clang 17.0.6
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGObjCRuntime.h"
17#include "CGOpenCLRuntime.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "CodeGenModule.h"
21#include "ConstantEmitter.h"
22#include "PatternInit.h"
23#include "TargetInfo.h"
25#include "clang/AST/Attr.h"
26#include "clang/AST/Decl.h"
27#include "clang/AST/OSLog.h"
32#include "llvm/ADT/APFloat.h"
33#include "llvm/ADT/APInt.h"
34#include "llvm/ADT/FloatingPointMode.h"
35#include "llvm/ADT/SmallPtrSet.h"
36#include "llvm/ADT/StringExtras.h"
37#include "llvm/Analysis/ValueTracking.h"
38#include "llvm/IR/DataLayout.h"
39#include "llvm/IR/InlineAsm.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/IntrinsicsAArch64.h"
42#include "llvm/IR/IntrinsicsAMDGPU.h"
43#include "llvm/IR/IntrinsicsARM.h"
44#include "llvm/IR/IntrinsicsBPF.h"
45#include "llvm/IR/IntrinsicsHexagon.h"
46#include "llvm/IR/IntrinsicsLoongArch.h"
47#include "llvm/IR/IntrinsicsNVPTX.h"
48#include "llvm/IR/IntrinsicsPowerPC.h"
49#include "llvm/IR/IntrinsicsR600.h"
50#include "llvm/IR/IntrinsicsRISCV.h"
51#include "llvm/IR/IntrinsicsS390.h"
52#include "llvm/IR/IntrinsicsVE.h"
53#include "llvm/IR/IntrinsicsWebAssembly.h"
54#include "llvm/IR/IntrinsicsX86.h"
55#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/MatrixBuilder.h"
57#include "llvm/Support/ConvertUTF.h"
58#include "llvm/Support/ScopedPrinter.h"
59#include "llvm/TargetParser/AArch64TargetParser.h"
60#include "llvm/TargetParser/X86TargetParser.h"
61#include <optional>
62#include <sstream>
63
64using namespace clang;
65using namespace CodeGen;
66using namespace llvm;
67
68static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
69 Align AlignmentInBytes) {
70 ConstantInt *Byte;
71 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
72 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
73 // Nothing to initialize.
74 return;
75 case LangOptions::TrivialAutoVarInitKind::Zero:
76 Byte = CGF.Builder.getInt8(0x00);
77 break;
78 case LangOptions::TrivialAutoVarInitKind::Pattern: {
79 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
80 Byte = llvm::dyn_cast<llvm::ConstantInt>(
81 initializationPatternFor(CGF.CGM, Int8));
82 break;
83 }
84 }
85 if (CGF.CGM.stopAutoInit())
86 return;
87 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
88 I->addAnnotationMetadata("auto-init");
89}
90
91/// getBuiltinLibFunction - Given a builtin id for a function like
92/// "__builtin_fabsf", return a Function* for "fabsf".
94 unsigned BuiltinID) {
95 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
96
97 // Get the name, skip over the __builtin_ prefix (if necessary).
98 StringRef Name;
99 GlobalDecl D(FD);
100
101 // TODO: This list should be expanded or refactored after all GCC-compatible
102 // std libcall builtins are implemented.
103 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
104 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
105 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
106 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
107 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
108 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
109 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
110 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
111 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
112 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
113 {Builtin::BI__builtin_printf, "__printfieee128"},
114 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
115 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
116 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
117 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
118 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
119 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
120 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
121 {Builtin::BI__builtin_scanf, "__scanfieee128"},
122 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
123 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
124 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
125 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
126 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
127 };
128
129 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
130 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
131 // if it is 64-bit 'long double' mode.
132 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
133 {Builtin::BI__builtin_frexpl, "frexp"},
134 {Builtin::BI__builtin_ldexpl, "ldexp"},
135 {Builtin::BI__builtin_modfl, "modf"},
136 };
137
138 // If the builtin has been declared explicitly with an assembler label,
139 // use the mangled name. This differs from the plain label on platforms
140 // that prefix labels.
141 if (FD->hasAttr<AsmLabelAttr>())
142 Name = getMangledName(D);
143 else {
144 // TODO: This mutation should also be applied to other targets other than
145 // PPC, after backend supports IEEE 128-bit style libcalls.
146 if (getTriple().isPPC64() &&
147 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
148 F128Builtins.find(BuiltinID) != F128Builtins.end())
149 Name = F128Builtins[BuiltinID];
150 else if (getTriple().isOSAIX() &&
151 &getTarget().getLongDoubleFormat() ==
152 &llvm::APFloat::IEEEdouble() &&
153 AIXLongDouble64Builtins.find(BuiltinID) !=
154 AIXLongDouble64Builtins.end())
155 Name = AIXLongDouble64Builtins[BuiltinID];
156 else
157 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
158 }
159
160 llvm::FunctionType *Ty =
161 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
162
163 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
164}
165
166/// Emit the conversions required to turn the given value into an
167/// integer of the given size.
168static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
169 QualType T, llvm::IntegerType *IntType) {
170 V = CGF.EmitToMemory(V, T);
171
172 if (V->getType()->isPointerTy())
173 return CGF.Builder.CreatePtrToInt(V, IntType);
174
175 assert(V->getType() == IntType);
176 return V;
177}
178
179static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
180 QualType T, llvm::Type *ResultType) {
181 V = CGF.EmitFromMemory(V, T);
182
183 if (ResultType->isPointerTy())
184 return CGF.Builder.CreateIntToPtr(V, ResultType);
185
186 assert(V->getType() == ResultType);
187 return V;
188}
189
190static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF,
191 const CallExpr *E) {
192 ASTContext &Ctx = CGF.getContext();
193 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
194 unsigned Bytes = Ptr.getElementType()->isPointerTy()
196 : Ptr.getElementType()->getScalarSizeInBits() / 8;
197 unsigned Align = Ptr.getAlignment().getQuantity();
198 if (Align % Bytes != 0) {
199 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
200 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
201 }
202 return Ptr.getPointer();
203}
204
205/// Utility to insert an atomic instruction based on Intrinsic::ID
206/// and the expression node.
208 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
209 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
210
211 QualType T = E->getType();
212 assert(E->getArg(0)->getType()->isPointerType());
213 assert(CGF.getContext().hasSameUnqualifiedType(T,
214 E->getArg(0)->getType()->getPointeeType()));
215 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
216
217 llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
218 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
219
220 llvm::IntegerType *IntType =
221 llvm::IntegerType::get(CGF.getLLVMContext(),
222 CGF.getContext().getTypeSize(T));
223 llvm::Type *IntPtrType =
224 llvm::PointerType::get(CGF.getLLVMContext(), AddrSpace);
225
226 llvm::Value *Args[2];
227 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
228 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
229 llvm::Type *ValueType = Args[1]->getType();
230 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
231
232 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
233 Kind, Args[0], Args[1], Ordering);
234 return EmitFromInt(CGF, Result, T, ValueType);
235}
236
238 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
239 Value *Address = CGF.EmitScalarExpr(E->getArg(1));
240
241 // Convert the type of the pointer to a pointer to the stored type.
242 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
243 unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
244 Value *BC = CGF.Builder.CreateBitCast(
245 Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
246 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
247 LV.setNontemporal(true);
248 CGF.EmitStoreOfScalar(Val, LV, false);
249 return nullptr;
250}
251
253 Value *Address = CGF.EmitScalarExpr(E->getArg(0));
254
255 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
256 LV.setNontemporal(true);
257 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
258}
259
261 llvm::AtomicRMWInst::BinOp Kind,
262 const CallExpr *E) {
263 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
264}
265
266/// Utility to insert an atomic instruction based Intrinsic::ID and
267/// the expression node, where the return value is the result of the
268/// operation.
270 llvm::AtomicRMWInst::BinOp Kind,
271 const CallExpr *E,
272 Instruction::BinaryOps Op,
273 bool Invert = false) {
274 QualType T = E->getType();
275 assert(E->getArg(0)->getType()->isPointerType());
276 assert(CGF.getContext().hasSameUnqualifiedType(T,
277 E->getArg(0)->getType()->getPointeeType()));
278 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
279
280 llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
281
282 llvm::IntegerType *IntType = llvm::IntegerType::get(
283 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
284
285 llvm::Value *Args[2];
286 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
287 llvm::Type *ValueType = Args[1]->getType();
288 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
289 Args[0] = DestPtr;
290
291 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
292 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
293 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
294 if (Invert)
295 Result =
296 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
297 llvm::ConstantInt::getAllOnesValue(IntType));
298 Result = EmitFromInt(CGF, Result, T, ValueType);
299 return RValue::get(Result);
300}
301
302/// Utility to insert an atomic cmpxchg instruction.
303///
304/// @param CGF The current codegen function.
305/// @param E Builtin call expression to convert to cmpxchg.
306/// arg0 - address to operate on
307/// arg1 - value to compare with
308/// arg2 - new value
309/// @param ReturnBool Specifies whether to return success flag of
310/// cmpxchg result or the old value.
311///
312/// @returns result of cmpxchg, according to ReturnBool
313///
314/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
315/// invoke the function EmitAtomicCmpXchgForMSIntrin.
317 bool ReturnBool) {
318 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
319 llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
320
321 llvm::IntegerType *IntType = llvm::IntegerType::get(
322 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
323
324 Value *Args[3];
325 Args[0] = DestPtr;
326 Args[1] = CGF.EmitScalarExpr(E->getArg(1));
327 llvm::Type *ValueType = Args[1]->getType();
328 Args[1] = EmitToInt(CGF, Args[1], T, IntType);
329 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
330
332 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
333 llvm::AtomicOrdering::SequentiallyConsistent);
334 if (ReturnBool)
335 // Extract boolean success flag and zext it to int.
336 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
337 CGF.ConvertType(E->getType()));
338 else
339 // Extract old value and emit it using the same type as compare value.
340 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
341 ValueType);
342}
343
344/// This function should be invoked to emit atomic cmpxchg for Microsoft's
345/// _InterlockedCompareExchange* intrinsics which have the following signature:
346/// T _InterlockedCompareExchange(T volatile *Destination,
347/// T Exchange,
348/// T Comparand);
349///
350/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
351/// cmpxchg *Destination, Comparand, Exchange.
352/// So we need to swap Comparand and Exchange when invoking
353/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
354/// function MakeAtomicCmpXchgValue since it expects the arguments to be
355/// already swapped.
356
357static
359 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
360 assert(E->getArg(0)->getType()->isPointerType());
362 E->getType(), E->getArg(0)->getType()->getPointeeType()));
363 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
364 E->getArg(1)->getType()));
365 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
366 E->getArg(2)->getType()));
367
368 auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
369 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
370 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
371
372 // For Release ordering, the failure ordering should be Monotonic.
373 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
374 AtomicOrdering::Monotonic :
375 SuccessOrdering;
376
377 // The atomic instruction is marked volatile for consistency with MSVC. This
378 // blocks the few atomics optimizations that LLVM has. If we want to optimize
379 // _Interlocked* operations in the future, we will have to remove the volatile
380 // marker.
382 Destination, Comparand, Exchange,
383 SuccessOrdering, FailureOrdering);
384 Result->setVolatile(true);
385 return CGF.Builder.CreateExtractValue(Result, 0);
386}
387
388// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
389// prototyped like this:
390//
391// unsigned char _InterlockedCompareExchange128...(
392// __int64 volatile * _Destination,
393// __int64 _ExchangeHigh,
394// __int64 _ExchangeLow,
395// __int64 * _ComparandResult);
397 const CallExpr *E,
398 AtomicOrdering SuccessOrdering) {
399 assert(E->getNumArgs() == 4);
400 llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
401 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
402 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
403 llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
404
405 assert(Destination->getType()->isPointerTy());
406 assert(!ExchangeHigh->getType()->isPointerTy());
407 assert(!ExchangeLow->getType()->isPointerTy());
408 assert(ComparandPtr->getType()->isPointerTy());
409
410 // For Release ordering, the failure ordering should be Monotonic.
411 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
412 ? AtomicOrdering::Monotonic
413 : SuccessOrdering;
414
415 // Convert to i128 pointers and values.
416 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
417 Address ComparandResult(ComparandPtr, Int128Ty,
419
420 // (((i128)hi) << 64) | ((i128)lo)
421 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
422 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
423 ExchangeHigh =
424 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
425 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
426
427 // Load the comparand for the instruction.
428 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
429
430 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
431 SuccessOrdering, FailureOrdering);
432
433 // The atomic instruction is marked volatile for consistency with MSVC. This
434 // blocks the few atomics optimizations that LLVM has. If we want to optimize
435 // _Interlocked* operations in the future, we will have to remove the volatile
436 // marker.
437 CXI->setVolatile(true);
438
439 // Store the result as an outparameter.
440 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
441 ComparandResult);
442
443 // Get the success boolean and zero extend it to i8.
444 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
445 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
446}
447
449 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
450 assert(E->getArg(0)->getType()->isPointerType());
451
452 auto *IntTy = CGF.ConvertType(E->getType());
453 auto *Result = CGF.Builder.CreateAtomicRMW(
454 AtomicRMWInst::Add,
455 CGF.EmitScalarExpr(E->getArg(0)),
456 ConstantInt::get(IntTy, 1),
457 Ordering);
458 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
459}
460
462 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
463 assert(E->getArg(0)->getType()->isPointerType());
464
465 auto *IntTy = CGF.ConvertType(E->getType());
466 auto *Result = CGF.Builder.CreateAtomicRMW(
467 AtomicRMWInst::Sub,
468 CGF.EmitScalarExpr(E->getArg(0)),
469 ConstantInt::get(IntTy, 1),
470 Ordering);
471 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
472}
473
474// Build a plain volatile load.
476 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
477 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
478 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
479 llvm::Type *ITy =
480 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
481 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
482 Load->setVolatile(true);
483 return Load;
484}
485
486// Build a plain volatile store.
488 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
489 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
490 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
491 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
492 llvm::StoreInst *Store =
493 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
494 Store->setVolatile(true);
495 return Store;
496}
497
498// Emit a simple mangled intrinsic that has 1 argument and a return type
499// matching the argument type. Depending on mode, this may be a constrained
500// floating-point intrinsic.
502 const CallExpr *E, unsigned IntrinsicID,
503 unsigned ConstrainedIntrinsicID) {
504 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
505
506 if (CGF.Builder.getIsFPConstrained()) {
507 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
508 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
509 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
510 } else {
511 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
512 return CGF.Builder.CreateCall(F, Src0);
513 }
514}
515
516// Emit an intrinsic that has 2 operands of the same type as its result.
517// Depending on mode, this may be a constrained floating-point intrinsic.
519 const CallExpr *E, unsigned IntrinsicID,
520 unsigned ConstrainedIntrinsicID) {
521 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
522 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
523
524 if (CGF.Builder.getIsFPConstrained()) {
525 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
526 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
527 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
528 } else {
529 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
530 return CGF.Builder.CreateCall(F, { Src0, Src1 });
531 }
532}
533
534// Has second type mangled argument.
536 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
537 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
538 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
539 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
540
541 if (CGF.Builder.getIsFPConstrained()) {
542 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
543 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
544 {Src0->getType(), Src1->getType()});
545 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
546 }
547
548 Function *F =
549 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
550 return CGF.Builder.CreateCall(F, {Src0, Src1});
551}
552
553// Emit an intrinsic that has 3 operands of the same type as its result.
554// Depending on mode, this may be a constrained floating-point intrinsic.
556 const CallExpr *E, unsigned IntrinsicID,
557 unsigned ConstrainedIntrinsicID) {
558 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
559 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
560 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
561
562 if (CGF.Builder.getIsFPConstrained()) {
563 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
564 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
565 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
566 } else {
567 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
568 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
569 }
570}
571
572// Emit an intrinsic where all operands are of the same type as the result.
573// Depending on mode, this may be a constrained floating-point intrinsic.
575 unsigned IntrinsicID,
576 unsigned ConstrainedIntrinsicID,
577 llvm::Type *Ty,
578 ArrayRef<Value *> Args) {
579 Function *F;
580 if (CGF.Builder.getIsFPConstrained())
581 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
582 else
583 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
584
585 if (CGF.Builder.getIsFPConstrained())
586 return CGF.Builder.CreateConstrainedFPCall(F, Args);
587 else
588 return CGF.Builder.CreateCall(F, Args);
589}
590
591// Emit a simple mangled intrinsic that has 1 argument and a return type
592// matching the argument type.
594 unsigned IntrinsicID,
595 llvm::StringRef Name = "") {
596 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
597
598 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
599 return CGF.Builder.CreateCall(F, Src0, Name);
600}
601
602// Emit an intrinsic that has 2 operands of the same type as its result.
604 const CallExpr *E,
605 unsigned IntrinsicID) {
606 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
607 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
608
609 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
610 return CGF.Builder.CreateCall(F, { Src0, Src1 });
611}
612
613// Emit an intrinsic that has 3 operands of the same type as its result.
615 const CallExpr *E,
616 unsigned IntrinsicID) {
617 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
618 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
619 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
620
621 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
622 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
623}
624
625// Emit an intrinsic that has 1 float or double operand, and 1 integer.
627 const CallExpr *E,
628 unsigned IntrinsicID) {
629 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
630 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
631
632 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
633 return CGF.Builder.CreateCall(F, {Src0, Src1});
634}
635
636// Emit an intrinsic that has overloaded integer result and fp operand.
637static Value *
639 unsigned IntrinsicID,
640 unsigned ConstrainedIntrinsicID) {
641 llvm::Type *ResultType = CGF.ConvertType(E->getType());
642 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
643
644 if (CGF.Builder.getIsFPConstrained()) {
645 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
646 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
647 {ResultType, Src0->getType()});
648 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
649 } else {
650 Function *F =
651 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
652 return CGF.Builder.CreateCall(F, Src0);
653 }
654}
655
657 llvm::Intrinsic::ID IntrinsicID) {
658 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
659 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
660
661 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
662 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
663 llvm::Function *F =
664 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
665 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
666
667 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
668 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
669 CGF.EmitStoreOfScalar(Exp, LV);
670
671 return CGF.Builder.CreateExtractValue(Call, 0);
672}
673
674/// EmitFAbs - Emit a call to @llvm.fabs().
676 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
677 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
678 Call->setDoesNotAccessMemory();
679 return Call;
680}
681
682/// Emit the computation of the sign bit for a floating point value. Returns
683/// the i1 sign bit value.
685 LLVMContext &C = CGF.CGM.getLLVMContext();
686
687 llvm::Type *Ty = V->getType();
688 int Width = Ty->getPrimitiveSizeInBits();
689 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
690 V = CGF.Builder.CreateBitCast(V, IntTy);
691 if (Ty->isPPC_FP128Ty()) {
692 // We want the sign bit of the higher-order double. The bitcast we just
693 // did works as if the double-double was stored to memory and then
694 // read as an i128. The "store" will put the higher-order double in the
695 // lower address in both little- and big-Endian modes, but the "load"
696 // will treat those bits as a different part of the i128: the low bits in
697 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
698 // we need to shift the high bits down to the low before truncating.
699 Width >>= 1;
700 if (CGF.getTarget().isBigEndian()) {
701 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
702 V = CGF.Builder.CreateLShr(V, ShiftCst);
703 }
704 // We are truncating value in order to extract the higher-order
705 // double, which we will be using to extract the sign from.
706 IntTy = llvm::IntegerType::get(C, Width);
707 V = CGF.Builder.CreateTrunc(V, IntTy);
708 }
709 Value *Zero = llvm::Constant::getNullValue(IntTy);
710 return CGF.Builder.CreateICmpSLT(V, Zero);
711}
712
714 const CallExpr *E, llvm::Constant *calleeValue) {
715 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
716 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
717}
718
719/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
720/// depending on IntrinsicID.
721///
722/// \arg CGF The current codegen function.
723/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
724/// \arg X The first argument to the llvm.*.with.overflow.*.
725/// \arg Y The second argument to the llvm.*.with.overflow.*.
726/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
727/// \returns The result (i.e. sum/product) returned by the intrinsic.
728static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
729 const llvm::Intrinsic::ID IntrinsicID,
730 llvm::Value *X, llvm::Value *Y,
731 llvm::Value *&Carry) {
732 // Make sure we have integers of the same width.
733 assert(X->getType() == Y->getType() &&
734 "Arguments must be the same type. (Did you forget to make sure both "
735 "arguments have the same integer width?)");
736
737 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
738 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
739 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
740 return CGF.Builder.CreateExtractValue(Tmp, 0);
741}
742
744 unsigned IntrinsicID,
745 int low, int high) {
746 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
747 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
748 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
749 llvm::Instruction *Call = CGF.Builder.CreateCall(F);
750 Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
751 Call->setMetadata(llvm::LLVMContext::MD_noundef,
752 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
753 return Call;
754}
755
756namespace {
757 struct WidthAndSignedness {
758 unsigned Width;
759 bool Signed;
760 };
761}
762
763static WidthAndSignedness
765 const clang::QualType Type) {
766 assert(Type->isIntegerType() && "Given type is not an integer.");
767 unsigned Width = Type->isBooleanType() ? 1
768 : Type->isBitIntType() ? context.getIntWidth(Type)
769 : context.getTypeInfo(Type).Width;
771 return {Width, Signed};
772}
773
774// Given one or more integer types, this function produces an integer type that
775// encompasses them: any value in one of the given types could be expressed in
776// the encompassing type.
777static struct WidthAndSignedness
778EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
779 assert(Types.size() > 0 && "Empty list of types.");
780
781 // If any of the given types is signed, we must return a signed type.
782 bool Signed = false;
783 for (const auto &Type : Types) {
784 Signed |= Type.Signed;
785 }
786
787 // The encompassing type must have a width greater than or equal to the width
788 // of the specified types. Additionally, if the encompassing type is signed,
789 // its width must be strictly greater than the width of any unsigned types
790 // given.
791 unsigned Width = 0;
792 for (const auto &Type : Types) {
793 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
794 if (Width < MinWidth) {
795 Width = MinWidth;
796 }
797 }
798
799 return {Width, Signed};
800}
801
802Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
803 llvm::Type *DestType = Int8PtrTy;
804 if (ArgValue->getType() != DestType)
805 ArgValue =
806 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
807
808 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
809 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
810}
811
812/// Checks if using the result of __builtin_object_size(p, @p From) in place of
813/// __builtin_object_size(p, @p To) is correct
814static bool areBOSTypesCompatible(int From, int To) {
815 // Note: Our __builtin_object_size implementation currently treats Type=0 and
816 // Type=2 identically. Encoding this implementation detail here may make
817 // improving __builtin_object_size difficult in the future, so it's omitted.
818 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
819}
820
821static llvm::Value *
822getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
823 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
824}
825
826llvm::Value *
827CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
828 llvm::IntegerType *ResType,
829 llvm::Value *EmittedE,
830 bool IsDynamic) {
831 uint64_t ObjectSize;
832 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
833 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
834 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
835}
836
837/// Returns a Value corresponding to the size of the given expression.
838/// This Value may be either of the following:
839/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
840/// it)
841/// - A call to the @llvm.objectsize intrinsic
842///
843/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
844/// and we wouldn't otherwise try to reference a pass_object_size parameter,
845/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
846llvm::Value *
847CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
848 llvm::IntegerType *ResType,
849 llvm::Value *EmittedE, bool IsDynamic) {
850 // We need to reference an argument if the pointer is a parameter with the
851 // pass_object_size attribute.
852 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
853 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
854 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
855 if (Param != nullptr && PS != nullptr &&
856 areBOSTypesCompatible(PS->getType(), Type)) {
857 auto Iter = SizeArguments.find(Param);
858 assert(Iter != SizeArguments.end());
859
860 const ImplicitParamDecl *D = Iter->second;
861 auto DIter = LocalDeclMap.find(D);
862 assert(DIter != LocalDeclMap.end());
863
864 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
865 getContext().getSizeType(), E->getBeginLoc());
866 }
867 }
868
869 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
870 // evaluate E for side-effects. In either case, we shouldn't lower to
871 // @llvm.objectsize.
872 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
874
875 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
876 assert(Ptr->getType()->isPointerTy() &&
877 "Non-pointer passed to __builtin_object_size?");
878
879 Function *F =
880 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
881
882 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
883 Value *Min = Builder.getInt1((Type & 2) != 0);
884 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
885 Value *NullIsUnknown = Builder.getTrue();
886 Value *Dynamic = Builder.getInt1(IsDynamic);
887 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
888}
889
890namespace {
891/// A struct to generically describe a bit test intrinsic.
892struct BitTest {
893 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
894 enum InterlockingKind : uint8_t {
895 Unlocked,
896 Sequential,
897 Acquire,
898 Release,
899 NoFence
900 };
901
902 ActionKind Action;
903 InterlockingKind Interlocking;
904 bool Is64Bit;
905
906 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
907};
908} // namespace
909
910BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
911 switch (BuiltinID) {
912 // Main portable variants.
913 case Builtin::BI_bittest:
914 return {TestOnly, Unlocked, false};
915 case Builtin::BI_bittestandcomplement:
916 return {Complement, Unlocked, false};
917 case Builtin::BI_bittestandreset:
918 return {Reset, Unlocked, false};
919 case Builtin::BI_bittestandset:
920 return {Set, Unlocked, false};
921 case Builtin::BI_interlockedbittestandreset:
922 return {Reset, Sequential, false};
923 case Builtin::BI_interlockedbittestandset:
924 return {Set, Sequential, false};
925
926 // X86-specific 64-bit variants.
927 case Builtin::BI_bittest64:
928 return {TestOnly, Unlocked, true};
929 case Builtin::BI_bittestandcomplement64:
930 return {Complement, Unlocked, true};
931 case Builtin::BI_bittestandreset64:
932 return {Reset, Unlocked, true};
933 case Builtin::BI_bittestandset64:
934 return {Set, Unlocked, true};
935 case Builtin::BI_interlockedbittestandreset64:
936 return {Reset, Sequential, true};
937 case Builtin::BI_interlockedbittestandset64:
938 return {Set, Sequential, true};
939
940 // ARM/AArch64-specific ordering variants.
941 case Builtin::BI_interlockedbittestandset_acq:
942 return {Set, Acquire, false};
943 case Builtin::BI_interlockedbittestandset_rel:
944 return {Set, Release, false};
945 case Builtin::BI_interlockedbittestandset_nf:
946 return {Set, NoFence, false};
947 case Builtin::BI_interlockedbittestandreset_acq:
948 return {Reset, Acquire, false};
949 case Builtin::BI_interlockedbittestandreset_rel:
950 return {Reset, Release, false};
951 case Builtin::BI_interlockedbittestandreset_nf:
952 return {Reset, NoFence, false};
953 }
954 llvm_unreachable("expected only bittest intrinsics");
955}
956
957static char bitActionToX86BTCode(BitTest::ActionKind A) {
958 switch (A) {
959 case BitTest::TestOnly: return '\0';
960 case BitTest::Complement: return 'c';
961 case BitTest::Reset: return 'r';
962 case BitTest::Set: return 's';
963 }
964 llvm_unreachable("invalid action");
965}
966
968 BitTest BT,
969 const CallExpr *E, Value *BitBase,
970 Value *BitPos) {
971 char Action = bitActionToX86BTCode(BT.Action);
972 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
973
974 // Build the assembly.
976 raw_svector_ostream AsmOS(Asm);
977 if (BT.Interlocking != BitTest::Unlocked)
978 AsmOS << "lock ";
979 AsmOS << "bt";
980 if (Action)
981 AsmOS << Action;
982 AsmOS << SizeSuffix << " $2, ($1)";
983
984 // Build the constraints. FIXME: We should support immediates when possible.
985 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
986 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
987 if (!MachineClobbers.empty()) {
988 Constraints += ',';
989 Constraints += MachineClobbers;
990 }
991 llvm::IntegerType *IntType = llvm::IntegerType::get(
992 CGF.getLLVMContext(),
993 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
994 llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext());
995 llvm::FunctionType *FTy =
996 llvm::FunctionType::get(CGF.Int8Ty, {PtrType, IntType}, false);
997
998 llvm::InlineAsm *IA =
999 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1000 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1001}
1002
1003static llvm::AtomicOrdering
1004getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1005 switch (I) {
1006 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1007 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1008 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1009 case BitTest::Release: return llvm::AtomicOrdering::Release;
1010 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1011 }
1012 llvm_unreachable("invalid interlocking");
1013}
1014
1015/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1016/// bits and a bit position and read and optionally modify the bit at that
1017/// position. The position index can be arbitrarily large, i.e. it can be larger
1018/// than 31 or 63, so we need an indexed load in the general case.
1019static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1020 unsigned BuiltinID,
1021 const CallExpr *E) {
1022 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1023 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1024
1025 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1026
1027 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1028 // indexing operation internally. Use them if possible.
1029 if (CGF.getTarget().getTriple().isX86())
1030 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1031
1032 // Otherwise, use generic code to load one byte and test the bit. Use all but
1033 // the bottom three bits as the array index, and the bottom three bits to form
1034 // a mask.
1035 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1036 Value *ByteIndex = CGF.Builder.CreateAShr(
1037 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1038 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1039 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1040 ByteIndex, "bittest.byteaddr"),
1041 CGF.Int8Ty, CharUnits::One());
1042 Value *PosLow =
1043 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1044 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1045
1046 // The updating instructions will need a mask.
1047 Value *Mask = nullptr;
1048 if (BT.Action != BitTest::TestOnly) {
1049 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1050 "bittest.mask");
1051 }
1052
1053 // Check the action and ordering of the interlocked intrinsics.
1054 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1055
1056 Value *OldByte = nullptr;
1057 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1058 // Emit a combined atomicrmw load/store operation for the interlocked
1059 // intrinsics.
1060 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1061 if (BT.Action == BitTest::Reset) {
1062 Mask = CGF.Builder.CreateNot(Mask);
1063 RMWOp = llvm::AtomicRMWInst::And;
1064 }
1065 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
1066 Ordering);
1067 } else {
1068 // Emit a plain load for the non-interlocked intrinsics.
1069 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1070 Value *NewByte = nullptr;
1071 switch (BT.Action) {
1072 case BitTest::TestOnly:
1073 // Don't store anything.
1074 break;
1075 case BitTest::Complement:
1076 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1077 break;
1078 case BitTest::Reset:
1079 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1080 break;
1081 case BitTest::Set:
1082 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1083 break;
1084 }
1085 if (NewByte)
1086 CGF.Builder.CreateStore(NewByte, ByteAddr);
1087 }
1088
1089 // However we loaded the old byte, either by plain load or atomicrmw, shift
1090 // the bit into the low position and mask it to 0 or 1.
1091 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1092 return CGF.Builder.CreateAnd(
1093 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1094}
1095
1097 unsigned BuiltinID,
1098 const CallExpr *E) {
1099 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1100
1102 raw_svector_ostream AsmOS(Asm);
1103 llvm::IntegerType *RetType = CGF.Int32Ty;
1104
1105 switch (BuiltinID) {
1106 case clang::PPC::BI__builtin_ppc_ldarx:
1107 AsmOS << "ldarx ";
1108 RetType = CGF.Int64Ty;
1109 break;
1110 case clang::PPC::BI__builtin_ppc_lwarx:
1111 AsmOS << "lwarx ";
1112 RetType = CGF.Int32Ty;
1113 break;
1114 case clang::PPC::BI__builtin_ppc_lharx:
1115 AsmOS << "lharx ";
1116 RetType = CGF.Int16Ty;
1117 break;
1118 case clang::PPC::BI__builtin_ppc_lbarx:
1119 AsmOS << "lbarx ";
1120 RetType = CGF.Int8Ty;
1121 break;
1122 default:
1123 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1124 }
1125
1126 AsmOS << "$0, ${1:y}";
1127
1128 std::string Constraints = "=r,*Z,~{memory}";
1129 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1130 if (!MachineClobbers.empty()) {
1131 Constraints += ',';
1132 Constraints += MachineClobbers;
1133 }
1134
1135 llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext());
1136 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1137
1138 llvm::InlineAsm *IA =
1139 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1140 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1141 CI->addParamAttr(
1142 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1143 return CI;
1144}
1145
1146namespace {
1147enum class MSVCSetJmpKind {
1148 _setjmpex,
1149 _setjmp3,
1150 _setjmp
1151};
1152}
1153
1154/// MSVC handles setjmp a bit differently on different platforms. On every
1155/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1156/// parameters can be passed as variadic arguments, but we always pass none.
1157static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1158 const CallExpr *E) {
1159 llvm::Value *Arg1 = nullptr;
1160 llvm::Type *Arg1Ty = nullptr;
1161 StringRef Name;
1162 bool IsVarArg = false;
1163 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1164 Name = "_setjmp3";
1165 Arg1Ty = CGF.Int32Ty;
1166 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1167 IsVarArg = true;
1168 } else {
1169 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1170 Arg1Ty = CGF.Int8PtrTy;
1171 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1172 Arg1 = CGF.Builder.CreateCall(
1173 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1174 } else
1175 Arg1 = CGF.Builder.CreateCall(
1176 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1177 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1178 }
1179
1180 // Mark the call site and declaration with ReturnsTwice.
1181 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1182 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1183 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1184 llvm::Attribute::ReturnsTwice);
1185 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1186 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1187 ReturnsTwiceAttr, /*Local=*/true);
1188
1189 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1190 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1191 llvm::Value *Args[] = {Buf, Arg1};
1192 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1193 CB->setAttributes(ReturnsTwiceAttr);
1194 return RValue::get(CB);
1195}
1196
1197// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1198// we handle them here.
1240
1241static std::optional<CodeGenFunction::MSVCIntrin>
1242translateArmToMsvcIntrin(unsigned BuiltinID) {
1243 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1244 switch (BuiltinID) {
1245 default:
1246 return std::nullopt;
1247 case clang::ARM::BI_BitScanForward:
1248 case clang::ARM::BI_BitScanForward64:
1249 return MSVCIntrin::_BitScanForward;
1250 case clang::ARM::BI_BitScanReverse:
1251 case clang::ARM::BI_BitScanReverse64:
1252 return MSVCIntrin::_BitScanReverse;
1253 case clang::ARM::BI_InterlockedAnd64:
1254 return MSVCIntrin::_InterlockedAnd;
1255 case clang::ARM::BI_InterlockedExchange64:
1256 return MSVCIntrin::_InterlockedExchange;
1257 case clang::ARM::BI_InterlockedExchangeAdd64:
1258 return MSVCIntrin::_InterlockedExchangeAdd;
1259 case clang::ARM::BI_InterlockedExchangeSub64:
1260 return MSVCIntrin::_InterlockedExchangeSub;
1261 case clang::ARM::BI_InterlockedOr64:
1262 return MSVCIntrin::_InterlockedOr;
1263 case clang::ARM::BI_InterlockedXor64:
1264 return MSVCIntrin::_InterlockedXor;
1265 case clang::ARM::BI_InterlockedDecrement64:
1266 return MSVCIntrin::_InterlockedDecrement;
1267 case clang::ARM::BI_InterlockedIncrement64:
1268 return MSVCIntrin::_InterlockedIncrement;
1269 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1270 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1271 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1272 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1273 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1274 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1275 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1276 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1277 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1278 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1279 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1280 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1281 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1282 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1283 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1284 case clang::ARM::BI_InterlockedExchange8_acq:
1285 case clang::ARM::BI_InterlockedExchange16_acq:
1286 case clang::ARM::BI_InterlockedExchange_acq:
1287 case clang::ARM::BI_InterlockedExchange64_acq:
1288 return MSVCIntrin::_InterlockedExchange_acq;
1289 case clang::ARM::BI_InterlockedExchange8_rel:
1290 case clang::ARM::BI_InterlockedExchange16_rel:
1291 case clang::ARM::BI_InterlockedExchange_rel:
1292 case clang::ARM::BI_InterlockedExchange64_rel:
1293 return MSVCIntrin::_InterlockedExchange_rel;
1294 case clang::ARM::BI_InterlockedExchange8_nf:
1295 case clang::ARM::BI_InterlockedExchange16_nf:
1296 case clang::ARM::BI_InterlockedExchange_nf:
1297 case clang::ARM::BI_InterlockedExchange64_nf:
1298 return MSVCIntrin::_InterlockedExchange_nf;
1299 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1300 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1301 case clang::ARM::BI_InterlockedCompareExchange_acq:
1302 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1303 return MSVCIntrin::_InterlockedCompareExchange_acq;
1304 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1305 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1306 case clang::ARM::BI_InterlockedCompareExchange_rel:
1307 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1308 return MSVCIntrin::_InterlockedCompareExchange_rel;
1309 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1310 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1311 case clang::ARM::BI_InterlockedCompareExchange_nf:
1312 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1313 return MSVCIntrin::_InterlockedCompareExchange_nf;
1314 case clang::ARM::BI_InterlockedOr8_acq:
1315 case clang::ARM::BI_InterlockedOr16_acq:
1316 case clang::ARM::BI_InterlockedOr_acq:
1317 case clang::ARM::BI_InterlockedOr64_acq:
1318 return MSVCIntrin::_InterlockedOr_acq;
1319 case clang::ARM::BI_InterlockedOr8_rel:
1320 case clang::ARM::BI_InterlockedOr16_rel:
1321 case clang::ARM::BI_InterlockedOr_rel:
1322 case clang::ARM::BI_InterlockedOr64_rel:
1323 return MSVCIntrin::_InterlockedOr_rel;
1324 case clang::ARM::BI_InterlockedOr8_nf:
1325 case clang::ARM::BI_InterlockedOr16_nf:
1326 case clang::ARM::BI_InterlockedOr_nf:
1327 case clang::ARM::BI_InterlockedOr64_nf:
1328 return MSVCIntrin::_InterlockedOr_nf;
1329 case clang::ARM::BI_InterlockedXor8_acq:
1330 case clang::ARM::BI_InterlockedXor16_acq:
1331 case clang::ARM::BI_InterlockedXor_acq:
1332 case clang::ARM::BI_InterlockedXor64_acq:
1333 return MSVCIntrin::_InterlockedXor_acq;
1334 case clang::ARM::BI_InterlockedXor8_rel:
1335 case clang::ARM::BI_InterlockedXor16_rel:
1336 case clang::ARM::BI_InterlockedXor_rel:
1337 case clang::ARM::BI_InterlockedXor64_rel:
1338 return MSVCIntrin::_InterlockedXor_rel;
1339 case clang::ARM::BI_InterlockedXor8_nf:
1340 case clang::ARM::BI_InterlockedXor16_nf:
1341 case clang::ARM::BI_InterlockedXor_nf:
1342 case clang::ARM::BI_InterlockedXor64_nf:
1343 return MSVCIntrin::_InterlockedXor_nf;
1344 case clang::ARM::BI_InterlockedAnd8_acq:
1345 case clang::ARM::BI_InterlockedAnd16_acq:
1346 case clang::ARM::BI_InterlockedAnd_acq:
1347 case clang::ARM::BI_InterlockedAnd64_acq:
1348 return MSVCIntrin::_InterlockedAnd_acq;
1349 case clang::ARM::BI_InterlockedAnd8_rel:
1350 case clang::ARM::BI_InterlockedAnd16_rel:
1351 case clang::ARM::BI_InterlockedAnd_rel:
1352 case clang::ARM::BI_InterlockedAnd64_rel:
1353 return MSVCIntrin::_InterlockedAnd_rel;
1354 case clang::ARM::BI_InterlockedAnd8_nf:
1355 case clang::ARM::BI_InterlockedAnd16_nf:
1356 case clang::ARM::BI_InterlockedAnd_nf:
1357 case clang::ARM::BI_InterlockedAnd64_nf:
1358 return MSVCIntrin::_InterlockedAnd_nf;
1359 case clang::ARM::BI_InterlockedIncrement16_acq:
1360 case clang::ARM::BI_InterlockedIncrement_acq:
1361 case clang::ARM::BI_InterlockedIncrement64_acq:
1362 return MSVCIntrin::_InterlockedIncrement_acq;
1363 case clang::ARM::BI_InterlockedIncrement16_rel:
1364 case clang::ARM::BI_InterlockedIncrement_rel:
1365 case clang::ARM::BI_InterlockedIncrement64_rel:
1366 return MSVCIntrin::_InterlockedIncrement_rel;
1367 case clang::ARM::BI_InterlockedIncrement16_nf:
1368 case clang::ARM::BI_InterlockedIncrement_nf:
1369 case clang::ARM::BI_InterlockedIncrement64_nf:
1370 return MSVCIntrin::_InterlockedIncrement_nf;
1371 case clang::ARM::BI_InterlockedDecrement16_acq:
1372 case clang::ARM::BI_InterlockedDecrement_acq:
1373 case clang::ARM::BI_InterlockedDecrement64_acq:
1374 return MSVCIntrin::_InterlockedDecrement_acq;
1375 case clang::ARM::BI_InterlockedDecrement16_rel:
1376 case clang::ARM::BI_InterlockedDecrement_rel:
1377 case clang::ARM::BI_InterlockedDecrement64_rel:
1378 return MSVCIntrin::_InterlockedDecrement_rel;
1379 case clang::ARM::BI_InterlockedDecrement16_nf:
1380 case clang::ARM::BI_InterlockedDecrement_nf:
1381 case clang::ARM::BI_InterlockedDecrement64_nf:
1382 return MSVCIntrin::_InterlockedDecrement_nf;
1383 }
1384 llvm_unreachable("must return from switch");
1385}
1386
1387static std::optional<CodeGenFunction::MSVCIntrin>
1388translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1389 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1390 switch (BuiltinID) {
1391 default:
1392 return std::nullopt;
1393 case clang::AArch64::BI_BitScanForward:
1394 case clang::AArch64::BI_BitScanForward64:
1395 return MSVCIntrin::_BitScanForward;
1396 case clang::AArch64::BI_BitScanReverse:
1397 case clang::AArch64::BI_BitScanReverse64:
1398 return MSVCIntrin::_BitScanReverse;
1399 case clang::AArch64::BI_InterlockedAnd64:
1400 return MSVCIntrin::_InterlockedAnd;
1401 case clang::AArch64::BI_InterlockedExchange64:
1402 return MSVCIntrin::_InterlockedExchange;
1403 case clang::AArch64::BI_InterlockedExchangeAdd64:
1404 return MSVCIntrin::_InterlockedExchangeAdd;
1405 case clang::AArch64::BI_InterlockedExchangeSub64:
1406 return MSVCIntrin::_InterlockedExchangeSub;
1407 case clang::AArch64::BI_InterlockedOr64:
1408 return MSVCIntrin::_InterlockedOr;
1409 case clang::AArch64::BI_InterlockedXor64:
1410 return MSVCIntrin::_InterlockedXor;
1411 case clang::AArch64::BI_InterlockedDecrement64:
1412 return MSVCIntrin::_InterlockedDecrement;
1413 case clang::AArch64::BI_InterlockedIncrement64:
1414 return MSVCIntrin::_InterlockedIncrement;
1415 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1416 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1417 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1418 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1419 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1420 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1421 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1422 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1423 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1424 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1425 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1426 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1427 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1428 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1429 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1430 case clang::AArch64::BI_InterlockedExchange8_acq:
1431 case clang::AArch64::BI_InterlockedExchange16_acq:
1432 case clang::AArch64::BI_InterlockedExchange_acq:
1433 case clang::AArch64::BI_InterlockedExchange64_acq:
1434 return MSVCIntrin::_InterlockedExchange_acq;
1435 case clang::AArch64::BI_InterlockedExchange8_rel:
1436 case clang::AArch64::BI_InterlockedExchange16_rel:
1437 case clang::AArch64::BI_InterlockedExchange_rel:
1438 case clang::AArch64::BI_InterlockedExchange64_rel:
1439 return MSVCIntrin::_InterlockedExchange_rel;
1440 case clang::AArch64::BI_InterlockedExchange8_nf:
1441 case clang::AArch64::BI_InterlockedExchange16_nf:
1442 case clang::AArch64::BI_InterlockedExchange_nf:
1443 case clang::AArch64::BI_InterlockedExchange64_nf:
1444 return MSVCIntrin::_InterlockedExchange_nf;
1445 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1446 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1447 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1448 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1449 return MSVCIntrin::_InterlockedCompareExchange_acq;
1450 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1451 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1452 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1453 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1454 return MSVCIntrin::_InterlockedCompareExchange_rel;
1455 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1456 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1457 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1458 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1459 return MSVCIntrin::_InterlockedCompareExchange_nf;
1460 case clang::AArch64::BI_InterlockedCompareExchange128:
1461 return MSVCIntrin::_InterlockedCompareExchange128;
1462 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1463 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1464 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1465 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1466 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1467 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1468 case clang::AArch64::BI_InterlockedOr8_acq:
1469 case clang::AArch64::BI_InterlockedOr16_acq:
1470 case clang::AArch64::BI_InterlockedOr_acq:
1471 case clang::AArch64::BI_InterlockedOr64_acq:
1472 return MSVCIntrin::_InterlockedOr_acq;
1473 case clang::AArch64::BI_InterlockedOr8_rel:
1474 case clang::AArch64::BI_InterlockedOr16_rel:
1475 case clang::AArch64::BI_InterlockedOr_rel:
1476 case clang::AArch64::BI_InterlockedOr64_rel:
1477 return MSVCIntrin::_InterlockedOr_rel;
1478 case clang::AArch64::BI_InterlockedOr8_nf:
1479 case clang::AArch64::BI_InterlockedOr16_nf:
1480 case clang::AArch64::BI_InterlockedOr_nf:
1481 case clang::AArch64::BI_InterlockedOr64_nf:
1482 return MSVCIntrin::_InterlockedOr_nf;
1483 case clang::AArch64::BI_InterlockedXor8_acq:
1484 case clang::AArch64::BI_InterlockedXor16_acq:
1485 case clang::AArch64::BI_InterlockedXor_acq:
1486 case clang::AArch64::BI_InterlockedXor64_acq:
1487 return MSVCIntrin::_InterlockedXor_acq;
1488 case clang::AArch64::BI_InterlockedXor8_rel:
1489 case clang::AArch64::BI_InterlockedXor16_rel:
1490 case clang::AArch64::BI_InterlockedXor_rel:
1491 case clang::AArch64::BI_InterlockedXor64_rel:
1492 return MSVCIntrin::_InterlockedXor_rel;
1493 case clang::AArch64::BI_InterlockedXor8_nf:
1494 case clang::AArch64::BI_InterlockedXor16_nf:
1495 case clang::AArch64::BI_InterlockedXor_nf:
1496 case clang::AArch64::BI_InterlockedXor64_nf:
1497 return MSVCIntrin::_InterlockedXor_nf;
1498 case clang::AArch64::BI_InterlockedAnd8_acq:
1499 case clang::AArch64::BI_InterlockedAnd16_acq:
1500 case clang::AArch64::BI_InterlockedAnd_acq:
1501 case clang::AArch64::BI_InterlockedAnd64_acq:
1502 return MSVCIntrin::_InterlockedAnd_acq;
1503 case clang::AArch64::BI_InterlockedAnd8_rel:
1504 case clang::AArch64::BI_InterlockedAnd16_rel:
1505 case clang::AArch64::BI_InterlockedAnd_rel:
1506 case clang::AArch64::BI_InterlockedAnd64_rel:
1507 return MSVCIntrin::_InterlockedAnd_rel;
1508 case clang::AArch64::BI_InterlockedAnd8_nf:
1509 case clang::AArch64::BI_InterlockedAnd16_nf:
1510 case clang::AArch64::BI_InterlockedAnd_nf:
1511 case clang::AArch64::BI_InterlockedAnd64_nf:
1512 return MSVCIntrin::_InterlockedAnd_nf;
1513 case clang::AArch64::BI_InterlockedIncrement16_acq:
1514 case clang::AArch64::BI_InterlockedIncrement_acq:
1515 case clang::AArch64::BI_InterlockedIncrement64_acq:
1516 return MSVCIntrin::_InterlockedIncrement_acq;
1517 case clang::AArch64::BI_InterlockedIncrement16_rel:
1518 case clang::AArch64::BI_InterlockedIncrement_rel:
1519 case clang::AArch64::BI_InterlockedIncrement64_rel:
1520 return MSVCIntrin::_InterlockedIncrement_rel;
1521 case clang::AArch64::BI_InterlockedIncrement16_nf:
1522 case clang::AArch64::BI_InterlockedIncrement_nf:
1523 case clang::AArch64::BI_InterlockedIncrement64_nf:
1524 return MSVCIntrin::_InterlockedIncrement_nf;
1525 case clang::AArch64::BI_InterlockedDecrement16_acq:
1526 case clang::AArch64::BI_InterlockedDecrement_acq:
1527 case clang::AArch64::BI_InterlockedDecrement64_acq:
1528 return MSVCIntrin::_InterlockedDecrement_acq;
1529 case clang::AArch64::BI_InterlockedDecrement16_rel:
1530 case clang::AArch64::BI_InterlockedDecrement_rel:
1531 case clang::AArch64::BI_InterlockedDecrement64_rel:
1532 return MSVCIntrin::_InterlockedDecrement_rel;
1533 case clang::AArch64::BI_InterlockedDecrement16_nf:
1534 case clang::AArch64::BI_InterlockedDecrement_nf:
1535 case clang::AArch64::BI_InterlockedDecrement64_nf:
1536 return MSVCIntrin::_InterlockedDecrement_nf;
1537 }
1538 llvm_unreachable("must return from switch");
1539}
1540
1541static std::optional<CodeGenFunction::MSVCIntrin>
1542translateX86ToMsvcIntrin(unsigned BuiltinID) {
1543 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1544 switch (BuiltinID) {
1545 default:
1546 return std::nullopt;
1547 case clang::X86::BI_BitScanForward:
1548 case clang::X86::BI_BitScanForward64:
1549 return MSVCIntrin::_BitScanForward;
1550 case clang::X86::BI_BitScanReverse:
1551 case clang::X86::BI_BitScanReverse64:
1552 return MSVCIntrin::_BitScanReverse;
1553 case clang::X86::BI_InterlockedAnd64:
1554 return MSVCIntrin::_InterlockedAnd;
1555 case clang::X86::BI_InterlockedCompareExchange128:
1556 return MSVCIntrin::_InterlockedCompareExchange128;
1557 case clang::X86::BI_InterlockedExchange64:
1558 return MSVCIntrin::_InterlockedExchange;
1559 case clang::X86::BI_InterlockedExchangeAdd64:
1560 return MSVCIntrin::_InterlockedExchangeAdd;
1561 case clang::X86::BI_InterlockedExchangeSub64:
1562 return MSVCIntrin::_InterlockedExchangeSub;
1563 case clang::X86::BI_InterlockedOr64:
1564 return MSVCIntrin::_InterlockedOr;
1565 case clang::X86::BI_InterlockedXor64:
1566 return MSVCIntrin::_InterlockedXor;
1567 case clang::X86::BI_InterlockedDecrement64:
1568 return MSVCIntrin::_InterlockedDecrement;
1569 case clang::X86::BI_InterlockedIncrement64:
1570 return MSVCIntrin::_InterlockedIncrement;
1571 }
1572 llvm_unreachable("must return from switch");
1573}
1574
1575// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1576Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1577 const CallExpr *E) {
1578 switch (BuiltinID) {
1579 case MSVCIntrin::_BitScanForward:
1580 case MSVCIntrin::_BitScanReverse: {
1581 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1582 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1583
1584 llvm::Type *ArgType = ArgValue->getType();
1585 llvm::Type *IndexType = IndexAddress.getElementType();
1586 llvm::Type *ResultType = ConvertType(E->getType());
1587
1588 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1589 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1590 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1591
1592 BasicBlock *Begin = Builder.GetInsertBlock();
1593 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1594 Builder.SetInsertPoint(End);
1595 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1596
1597 Builder.SetInsertPoint(Begin);
1598 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1599 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1600 Builder.CreateCondBr(IsZero, End, NotZero);
1601 Result->addIncoming(ResZero, Begin);
1602
1603 Builder.SetInsertPoint(NotZero);
1604
1605 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1606 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1607 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1608 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1609 Builder.CreateStore(ZeroCount, IndexAddress, false);
1610 } else {
1611 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1612 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1613
1614 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1615 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1616 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1617 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1618 Builder.CreateStore(Index, IndexAddress, false);
1619 }
1620 Builder.CreateBr(End);
1621 Result->addIncoming(ResOne, NotZero);
1622
1623 Builder.SetInsertPoint(End);
1624 return Result;
1625 }
1626 case MSVCIntrin::_InterlockedAnd:
1627 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1628 case MSVCIntrin::_InterlockedExchange:
1629 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1630 case MSVCIntrin::_InterlockedExchangeAdd:
1631 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1632 case MSVCIntrin::_InterlockedExchangeSub:
1633 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1634 case MSVCIntrin::_InterlockedOr:
1635 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1636 case MSVCIntrin::_InterlockedXor:
1637 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1638 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1639 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1640 AtomicOrdering::Acquire);
1641 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1642 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1643 AtomicOrdering::Release);
1644 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1645 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1646 AtomicOrdering::Monotonic);
1647 case MSVCIntrin::_InterlockedExchange_acq:
1648 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1649 AtomicOrdering::Acquire);
1650 case MSVCIntrin::_InterlockedExchange_rel:
1651 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1652 AtomicOrdering::Release);
1653 case MSVCIntrin::_InterlockedExchange_nf:
1654 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1655 AtomicOrdering::Monotonic);
1656 case MSVCIntrin::_InterlockedCompareExchange_acq:
1657 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1658 case MSVCIntrin::_InterlockedCompareExchange_rel:
1659 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1660 case MSVCIntrin::_InterlockedCompareExchange_nf:
1661 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1662 case MSVCIntrin::_InterlockedCompareExchange128:
1664 *this, E, AtomicOrdering::SequentiallyConsistent);
1665 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1666 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1667 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1668 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1669 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1670 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1671 case MSVCIntrin::_InterlockedOr_acq:
1672 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1673 AtomicOrdering::Acquire);
1674 case MSVCIntrin::_InterlockedOr_rel:
1675 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1676 AtomicOrdering::Release);
1677 case MSVCIntrin::_InterlockedOr_nf:
1678 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1679 AtomicOrdering::Monotonic);
1680 case MSVCIntrin::_InterlockedXor_acq:
1681 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1682 AtomicOrdering::Acquire);
1683 case MSVCIntrin::_InterlockedXor_rel:
1684 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1685 AtomicOrdering::Release);
1686 case MSVCIntrin::_InterlockedXor_nf:
1687 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1688 AtomicOrdering::Monotonic);
1689 case MSVCIntrin::_InterlockedAnd_acq:
1690 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1691 AtomicOrdering::Acquire);
1692 case MSVCIntrin::_InterlockedAnd_rel:
1693 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1694 AtomicOrdering::Release);
1695 case MSVCIntrin::_InterlockedAnd_nf:
1696 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1697 AtomicOrdering::Monotonic);
1698 case MSVCIntrin::_InterlockedIncrement_acq:
1699 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1700 case MSVCIntrin::_InterlockedIncrement_rel:
1701 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1702 case MSVCIntrin::_InterlockedIncrement_nf:
1703 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1704 case MSVCIntrin::_InterlockedDecrement_acq:
1705 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1706 case MSVCIntrin::_InterlockedDecrement_rel:
1707 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1708 case MSVCIntrin::_InterlockedDecrement_nf:
1709 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1710
1711 case MSVCIntrin::_InterlockedDecrement:
1712 return EmitAtomicDecrementValue(*this, E);
1713 case MSVCIntrin::_InterlockedIncrement:
1714 return EmitAtomicIncrementValue(*this, E);
1715
1716 case MSVCIntrin::__fastfail: {
1717 // Request immediate process termination from the kernel. The instruction
1718 // sequences to do this are documented on MSDN:
1719 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1720 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1721 StringRef Asm, Constraints;
1722 switch (ISA) {
1723 default:
1724 ErrorUnsupported(E, "__fastfail call for this architecture");
1725 break;
1726 case llvm::Triple::x86:
1727 case llvm::Triple::x86_64:
1728 Asm = "int $$0x29";
1729 Constraints = "{cx}";
1730 break;
1731 case llvm::Triple::thumb:
1732 Asm = "udf #251";
1733 Constraints = "{r0}";
1734 break;
1735 case llvm::Triple::aarch64:
1736 Asm = "brk #0xF003";
1737 Constraints = "{w0}";
1738 }
1739 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1740 llvm::InlineAsm *IA =
1741 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1742 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1743 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1744 llvm::Attribute::NoReturn);
1745 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1746 CI->setAttributes(NoReturnAttr);
1747 return CI;
1748 }
1749 }
1750 llvm_unreachable("Incorrect MSVC intrinsic!");
1751}
1752
1753namespace {
1754// ARC cleanup for __builtin_os_log_format
1755struct CallObjCArcUse final : EHScopeStack::Cleanup {
1756 CallObjCArcUse(llvm::Value *object) : object(object) {}
1757 llvm::Value *object;
1758
1759 void Emit(CodeGenFunction &CGF, Flags flags) override {
1760 CGF.EmitARCIntrinsicUse(object);
1761 }
1762};
1763}
1764
1766 BuiltinCheckKind Kind) {
1767 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1768 && "Unsupported builtin check kind");
1769
1770 Value *ArgValue = EmitScalarExpr(E);
1771 if (!SanOpts.has(SanitizerKind::Builtin))
1772 return ArgValue;
1773
1774 SanitizerScope SanScope(this);
1775 Value *Cond = Builder.CreateICmpNE(
1776 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1777 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1778 SanitizerHandler::InvalidBuiltin,
1780 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1781 std::nullopt);
1782 return ArgValue;
1783}
1784
1785/// Get the argument type for arguments to os_log_helper.
1787 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1788 return C.getCanonicalType(UnsignedTy);
1789}
1790
1793 CharUnits BufferAlignment) {
1794 ASTContext &Ctx = getContext();
1795
1797 {
1798 raw_svector_ostream OS(Name);
1799 OS << "__os_log_helper";
1800 OS << "_" << BufferAlignment.getQuantity();
1801 OS << "_" << int(Layout.getSummaryByte());
1802 OS << "_" << int(Layout.getNumArgsByte());
1803 for (const auto &Item : Layout.Items)
1804 OS << "_" << int(Item.getSizeByte()) << "_"
1805 << int(Item.getDescriptorByte());
1806 }
1807
1808 if (llvm::Function *F = CGM.getModule().getFunction(Name))
1809 return F;
1810
1812 FunctionArgList Args;
1813 Args.push_back(ImplicitParamDecl::Create(
1814 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
1816 ArgTys.emplace_back(Ctx.VoidPtrTy);
1817
1818 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
1819 char Size = Layout.Items[I].getSizeByte();
1820 if (!Size)
1821 continue;
1822
1823 QualType ArgTy = getOSLogArgType(Ctx, Size);
1824 Args.push_back(ImplicitParamDecl::Create(
1825 Ctx, nullptr, SourceLocation(),
1826 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
1828 ArgTys.emplace_back(ArgTy);
1829 }
1830
1831 QualType ReturnTy = Ctx.VoidTy;
1832
1833 // The helper function has linkonce_odr linkage to enable the linker to merge
1834 // identical functions. To ensure the merging always happens, 'noinline' is
1835 // attached to the function when compiling with -Oz.
1836 const CGFunctionInfo &FI =
1838 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
1839 llvm::Function *Fn = llvm::Function::Create(
1840 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
1841 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
1842 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
1844 Fn->setDoesNotThrow();
1845
1846 // Attach 'noinline' at -Oz.
1847 if (CGM.getCodeGenOpts().OptimizeSize == 2)
1848 Fn->addFnAttr(llvm::Attribute::NoInline);
1849
1850 auto NL = ApplyDebugLocation::CreateEmpty(*this);
1851 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
1852
1853 // Create a scope with an artificial location for the body of this function.
1854 auto AL = ApplyDebugLocation::CreateArtificial(*this);
1855
1857 Address BufAddr =
1858 Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty,
1859 BufferAlignment);
1860 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
1861 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
1862 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
1863 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
1864
1865 unsigned I = 1;
1866 for (const auto &Item : Layout.Items) {
1867 Builder.CreateStore(
1868 Builder.getInt8(Item.getDescriptorByte()),
1869 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
1870 Builder.CreateStore(
1871 Builder.getInt8(Item.getSizeByte()),
1872 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
1873
1874 CharUnits Size = Item.size();
1875 if (!Size.getQuantity())
1876 continue;
1877
1878 Address Arg = GetAddrOfLocalVar(Args[I]);
1879 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
1880 Addr = Addr.withElementType(Arg.getElementType());
1881 Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
1882 Offset += Size;
1883 ++I;
1884 }
1885
1887
1888 return Fn;
1889}
1890
1892 assert(E.getNumArgs() >= 2 &&
1893 "__builtin_os_log_format takes at least 2 arguments");
1894 ASTContext &Ctx = getContext();
1897 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
1898 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
1899
1900 // Ignore argument 1, the format string. It is not currently used.
1901 CallArgList Args;
1902 Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
1903
1904 for (const auto &Item : Layout.Items) {
1905 int Size = Item.getSizeByte();
1906 if (!Size)
1907 continue;
1908
1909 llvm::Value *ArgVal;
1910
1911 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
1912 uint64_t Val = 0;
1913 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
1914 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
1915 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
1916 } else if (const Expr *TheExpr = Item.getExpr()) {
1917 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
1918
1919 // If a temporary object that requires destruction after the full
1920 // expression is passed, push a lifetime-extended cleanup to extend its
1921 // lifetime to the end of the enclosing block scope.
1922 auto LifetimeExtendObject = [&](const Expr *E) {
1923 E = E->IgnoreParenCasts();
1924 // Extend lifetimes of objects returned by function calls and message
1925 // sends.
1926
1927 // FIXME: We should do this in other cases in which temporaries are
1928 // created including arguments of non-ARC types (e.g., C++
1929 // temporaries).
1930 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
1931 return true;
1932 return false;
1933 };
1934
1935 if (TheExpr->getType()->isObjCRetainableType() &&
1936 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
1937 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
1938 "Only scalar can be a ObjC retainable type");
1939 if (!isa<Constant>(ArgVal)) {
1940 CleanupKind Cleanup = getARCCleanupKind();
1941 QualType Ty = TheExpr->getType();
1942 Address Alloca = Address::invalid();
1943 Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
1944 ArgVal = EmitARCRetain(Ty, ArgVal);
1945 Builder.CreateStore(ArgVal, Addr);
1946 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
1948 Cleanup & EHCleanup);
1949
1950 // Push a clang.arc.use call to ensure ARC optimizer knows that the
1951 // argument has to be alive.
1952 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
1954 }
1955 }
1956 } else {
1957 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
1958 }
1959
1960 unsigned ArgValSize =
1961 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
1962 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
1963 ArgValSize);
1964 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
1965 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
1966 // If ArgVal has type x86_fp80, zero-extend ArgVal.
1967 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
1968 Args.add(RValue::get(ArgVal), ArgTy);
1969 }
1970
1971 const CGFunctionInfo &FI =
1974 Layout, BufAddr.getAlignment());
1976 return RValue::get(BufAddr.getPointer());
1977}
1978
1980 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
1981 WidthAndSignedness ResultInfo) {
1982 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1983 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
1984 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
1985}
1986
1988 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
1989 const clang::Expr *Op2, WidthAndSignedness Op2Info,
1990 const clang::Expr *ResultArg, QualType ResultQTy,
1991 WidthAndSignedness ResultInfo) {
1993 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
1994 "Cannot specialize this multiply");
1995
1996 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
1997 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
1998
1999 llvm::Value *HasOverflow;
2000 llvm::Value *Result = EmitOverflowIntrinsic(
2001 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2002
2003 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2004 // however, since the original builtin had a signed result, we need to report
2005 // an overflow when the result is greater than INT_MAX.
2006 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2007 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2008
2009 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2010 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2011
2012 bool isVolatile =
2013 ResultArg->getType()->getPointeeType().isVolatileQualified();
2014 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2015 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2016 isVolatile);
2017 return RValue::get(HasOverflow);
2018}
2019
2020/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2021static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2022 WidthAndSignedness Op1Info,
2023 WidthAndSignedness Op2Info,
2024 WidthAndSignedness ResultInfo) {
2025 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2026 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2027 Op1Info.Signed != Op2Info.Signed;
2028}
2029
2030/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2031/// the generic checked-binop irgen.
2032static RValue
2034 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2035 WidthAndSignedness Op2Info,
2036 const clang::Expr *ResultArg, QualType ResultQTy,
2037 WidthAndSignedness ResultInfo) {
2038 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2039 Op2Info, ResultInfo) &&
2040 "Not a mixed-sign multipliction we can specialize");
2041
2042 // Emit the signed and unsigned operands.
2043 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2044 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2045 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2046 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2047 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2048 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2049
2050 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2051 if (SignedOpWidth < UnsignedOpWidth)
2052 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2053 if (UnsignedOpWidth < SignedOpWidth)
2054 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2055
2056 llvm::Type *OpTy = Signed->getType();
2057 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2058 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2059 llvm::Type *ResTy = ResultPtr.getElementType();
2060 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2061
2062 // Take the absolute value of the signed operand.
2063 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2064 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2065 llvm::Value *AbsSigned =
2066 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2067
2068 // Perform a checked unsigned multiplication.
2069 llvm::Value *UnsignedOverflow;
2070 llvm::Value *UnsignedResult =
2071 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2072 Unsigned, UnsignedOverflow);
2073
2074 llvm::Value *Overflow, *Result;
2075 if (ResultInfo.Signed) {
2076 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2077 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2078 auto IntMax =
2079 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2080 llvm::Value *MaxResult =
2081 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2082 CGF.Builder.CreateZExt(IsNegative, OpTy));
2083 llvm::Value *SignedOverflow =
2084 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2085 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2086
2087 // Prepare the signed result (possibly by negating it).
2088 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2089 llvm::Value *SignedResult =
2090 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2091 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2092 } else {
2093 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2094 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2095 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2096 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2097 if (ResultInfo.Width < OpWidth) {
2098 auto IntMax =
2099 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2100 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2101 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2102 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2103 }
2104
2105 // Negate the product if it would be negative in infinite precision.
2106 Result = CGF.Builder.CreateSelect(
2107 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2108
2109 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2110 }
2111 assert(Overflow && Result && "Missing overflow or result");
2112
2113 bool isVolatile =
2114 ResultArg->getType()->getPointeeType().isVolatileQualified();
2115 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2116 isVolatile);
2117 return RValue::get(Overflow);
2118}
2119
2120static bool
2122 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2123 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2124 Ty = Ctx.getBaseElementType(Arr);
2125
2126 const auto *Record = Ty->getAsCXXRecordDecl();
2127 if (!Record)
2128 return false;
2129
2130 // We've already checked this type, or are in the process of checking it.
2131 if (!Seen.insert(Record).second)
2132 return false;
2133
2134 assert(Record->hasDefinition() &&
2135 "Incomplete types should already be diagnosed");
2136
2137 if (Record->isDynamicClass())
2138 return true;
2139
2140 for (FieldDecl *F : Record->fields()) {
2141 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2142 return true;
2143 }
2144 return false;
2145}
2146
2147/// Determine if the specified type requires laundering by checking if it is a
2148/// dynamic class type or contains a subobject which is a dynamic class type.
2150 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2151 return false;
2153 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2154}
2155
2156RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2157 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2158 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2159
2160 // The builtin's shift arg may have a different type than the source arg and
2161 // result, but the LLVM intrinsic uses the same type for all values.
2162 llvm::Type *Ty = Src->getType();
2163 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2164
2165 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2166 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2167 Function *F = CGM.getIntrinsic(IID, Ty);
2168 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2169}
2170
2171// Map math builtins for long-double to f128 version.
2172static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2173 switch (BuiltinID) {
2174#define MUTATE_LDBL(func) \
2175 case Builtin::BI__builtin_##func##l: \
2176 return Builtin::BI__builtin_##func##f128;
2207 MUTATE_LDBL(nans)
2208 MUTATE_LDBL(inf)
2227 MUTATE_LDBL(huge_val)
2237#undef MUTATE_LDBL
2238 default:
2239 return BuiltinID;
2240 }
2241}
2242
2243static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2244 Value *V) {
2245 if (CGF.Builder.getIsFPConstrained() &&
2246 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2247 if (Value *Result =
2248 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2249 return Result;
2250 }
2251 return nullptr;
2252}
2253
2254RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2255 const CallExpr *E,
2256 ReturnValueSlot ReturnValue) {
2257 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2258 // See if we can constant fold this builtin. If so, don't emit it at all.
2259 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2261 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2262 !Result.hasSideEffects()) {
2263 if (Result.Val.isInt())
2264 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2265 Result.Val.getInt()));
2266 if (Result.Val.isFloat())
2267 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2268 Result.Val.getFloat()));
2269 }
2270
2271 // If current long-double semantics is IEEE 128-bit, replace math builtins
2272 // of long-double with f128 equivalent.
2273 // TODO: This mutation should also be applied to other targets other than PPC,
2274 // after backend supports IEEE 128-bit style libcalls.
2275 if (getTarget().getTriple().isPPC64() &&
2276 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2277 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2278
2279 // If the builtin has been declared explicitly with an assembler label,
2280 // disable the specialized emitting below. Ideally we should communicate the
2281 // rename in IR, or at least avoid generating the intrinsic calls that are
2282 // likely to get lowered to the renamed library functions.
2283 const unsigned BuiltinIDIfNoAsmLabel =
2284 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2285
2286 // There are LLVM math intrinsics/instructions corresponding to math library
2287 // functions except the LLVM op will never set errno while the math library
2288 // might. Also, math builtins have the same semantics as their math library
2289 // twins. Thus, we can transform math library and builtin calls to their
2290 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2291 // In case FP exceptions are enabled, the experimental versions of the
2292 // intrinsics model those.
2293 bool ConstWithoutErrnoAndExceptions =
2295 bool ConstWithoutExceptions =
2297 if (FD->hasAttr<ConstAttr>() ||
2298 ((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) &&
2299 (!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) {
2300 switch (BuiltinIDIfNoAsmLabel) {
2301 case Builtin::BIceil:
2302 case Builtin::BIceilf:
2303 case Builtin::BIceill:
2304 case Builtin::BI__builtin_ceil:
2305 case Builtin::BI__builtin_ceilf:
2306 case Builtin::BI__builtin_ceilf16:
2307 case Builtin::BI__builtin_ceill:
2308 case Builtin::BI__builtin_ceilf128:
2310 Intrinsic::ceil,
2311 Intrinsic::experimental_constrained_ceil));
2312
2313 case Builtin::BIcopysign:
2314 case Builtin::BIcopysignf:
2315 case Builtin::BIcopysignl:
2316 case Builtin::BI__builtin_copysign:
2317 case Builtin::BI__builtin_copysignf:
2318 case Builtin::BI__builtin_copysignf16:
2319 case Builtin::BI__builtin_copysignl:
2320 case Builtin::BI__builtin_copysignf128:
2321 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2322
2323 case Builtin::BIcos:
2324 case Builtin::BIcosf:
2325 case Builtin::BIcosl:
2326 case Builtin::BI__builtin_cos:
2327 case Builtin::BI__builtin_cosf:
2328 case Builtin::BI__builtin_cosf16:
2329 case Builtin::BI__builtin_cosl:
2330 case Builtin::BI__builtin_cosf128:
2332 Intrinsic::cos,
2333 Intrinsic::experimental_constrained_cos));
2334
2335 case Builtin::BIexp:
2336 case Builtin::BIexpf:
2337 case Builtin::BIexpl:
2338 case Builtin::BI__builtin_exp:
2339 case Builtin::BI__builtin_expf:
2340 case Builtin::BI__builtin_expf16:
2341 case Builtin::BI__builtin_expl:
2342 case Builtin::BI__builtin_expf128:
2344 Intrinsic::exp,
2345 Intrinsic::experimental_constrained_exp));
2346
2347 case Builtin::BIexp2:
2348 case Builtin::BIexp2f:
2349 case Builtin::BIexp2l:
2350 case Builtin::BI__builtin_exp2:
2351 case Builtin::BI__builtin_exp2f:
2352 case Builtin::BI__builtin_exp2f16:
2353 case Builtin::BI__builtin_exp2l:
2354 case Builtin::BI__builtin_exp2f128:
2356 Intrinsic::exp2,
2357 Intrinsic::experimental_constrained_exp2));
2358
2359 case Builtin::BIfabs:
2360 case Builtin::BIfabsf:
2361 case Builtin::BIfabsl:
2362 case Builtin::BI__builtin_fabs:
2363 case Builtin::BI__builtin_fabsf:
2364 case Builtin::BI__builtin_fabsf16:
2365 case Builtin::BI__builtin_fabsl:
2366 case Builtin::BI__builtin_fabsf128:
2367 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2368
2369 case Builtin::BIfloor:
2370 case Builtin::BIfloorf:
2371 case Builtin::BIfloorl:
2372 case Builtin::BI__builtin_floor:
2373 case Builtin::BI__builtin_floorf:
2374 case Builtin::BI__builtin_floorf16:
2375 case Builtin::BI__builtin_floorl:
2376 case Builtin::BI__builtin_floorf128:
2378 Intrinsic::floor,
2379 Intrinsic::experimental_constrained_floor));
2380
2381 case Builtin::BIfma:
2382 case Builtin::BIfmaf:
2383 case Builtin::BIfmal:
2384 case Builtin::BI__builtin_fma:
2385 case Builtin::BI__builtin_fmaf:
2386 case Builtin::BI__builtin_fmaf16:
2387 case Builtin::BI__builtin_fmal:
2388 case Builtin::BI__builtin_fmaf128:
2390 Intrinsic::fma,
2391 Intrinsic::experimental_constrained_fma));
2392
2393 case Builtin::BIfmax:
2394 case Builtin::BIfmaxf:
2395 case Builtin::BIfmaxl:
2396 case Builtin::BI__builtin_fmax:
2397 case Builtin::BI__builtin_fmaxf:
2398 case Builtin::BI__builtin_fmaxf16:
2399 case Builtin::BI__builtin_fmaxl:
2400 case Builtin::BI__builtin_fmaxf128:
2402 Intrinsic::maxnum,
2403 Intrinsic::experimental_constrained_maxnum));
2404
2405 case Builtin::BIfmin:
2406 case Builtin::BIfminf:
2407 case Builtin::BIfminl:
2408 case Builtin::BI__builtin_fmin:
2409 case Builtin::BI__builtin_fminf:
2410 case Builtin::BI__builtin_fminf16:
2411 case Builtin::BI__builtin_fminl:
2412 case Builtin::BI__builtin_fminf128:
2414 Intrinsic::minnum,
2415 Intrinsic::experimental_constrained_minnum));
2416
2417 // fmod() is a special-case. It maps to the frem instruction rather than an
2418 // LLVM intrinsic.
2419 case Builtin::BIfmod:
2420 case Builtin::BIfmodf:
2421 case Builtin::BIfmodl:
2422 case Builtin::BI__builtin_fmod:
2423 case Builtin::BI__builtin_fmodf:
2424 case Builtin::BI__builtin_fmodf16:
2425 case Builtin::BI__builtin_fmodl:
2426 case Builtin::BI__builtin_fmodf128: {
2427 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2428 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2429 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2430 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2431 }
2432
2433 case Builtin::BIlog:
2434 case Builtin::BIlogf:
2435 case Builtin::BIlogl:
2436 case Builtin::BI__builtin_log:
2437 case Builtin::BI__builtin_logf:
2438 case Builtin::BI__builtin_logf16:
2439 case Builtin::BI__builtin_logl:
2440 case Builtin::BI__builtin_logf128:
2442 Intrinsic::log,
2443 Intrinsic::experimental_constrained_log));
2444
2445 case Builtin::BIlog10:
2446 case Builtin::BIlog10f:
2447 case Builtin::BIlog10l:
2448 case Builtin::BI__builtin_log10:
2449 case Builtin::BI__builtin_log10f:
2450 case Builtin::BI__builtin_log10f16:
2451 case Builtin::BI__builtin_log10l:
2452 case Builtin::BI__builtin_log10f128:
2454 Intrinsic::log10,
2455 Intrinsic::experimental_constrained_log10));
2456
2457 case Builtin::BIlog2:
2458 case Builtin::BIlog2f:
2459 case Builtin::BIlog2l:
2460 case Builtin::BI__builtin_log2:
2461 case Builtin::BI__builtin_log2f:
2462 case Builtin::BI__builtin_log2f16:
2463 case Builtin::BI__builtin_log2l:
2464 case Builtin::BI__builtin_log2f128:
2466 Intrinsic::log2,
2467 Intrinsic::experimental_constrained_log2));
2468
2469 case Builtin::BInearbyint:
2470 case Builtin::BInearbyintf:
2471 case Builtin::BInearbyintl:
2472 case Builtin::BI__builtin_nearbyint:
2473 case Builtin::BI__builtin_nearbyintf:
2474 case Builtin::BI__builtin_nearbyintl:
2475 case Builtin::BI__builtin_nearbyintf128:
2477 Intrinsic::nearbyint,
2478 Intrinsic::experimental_constrained_nearbyint));
2479
2480 case Builtin::BIpow:
2481 case Builtin::BIpowf:
2482 case Builtin::BIpowl:
2483 case Builtin::BI__builtin_pow:
2484 case Builtin::BI__builtin_powf:
2485 case Builtin::BI__builtin_powf16:
2486 case Builtin::BI__builtin_powl:
2487 case Builtin::BI__builtin_powf128:
2489 Intrinsic::pow,
2490 Intrinsic::experimental_constrained_pow));
2491
2492 case Builtin::BIrint:
2493 case Builtin::BIrintf:
2494 case Builtin::BIrintl:
2495 case Builtin::BI__builtin_rint:
2496 case Builtin::BI__builtin_rintf:
2497 case Builtin::BI__builtin_rintf16:
2498 case Builtin::BI__builtin_rintl:
2499 case Builtin::BI__builtin_rintf128:
2501 Intrinsic::rint,
2502 Intrinsic::experimental_constrained_rint));
2503
2504 case Builtin::BIround:
2505 case Builtin::BIroundf:
2506 case Builtin::BIroundl:
2507 case Builtin::BI__builtin_round:
2508 case Builtin::BI__builtin_roundf:
2509 case Builtin::BI__builtin_roundf16:
2510 case Builtin::BI__builtin_roundl:
2511 case Builtin::BI__builtin_roundf128:
2513 Intrinsic::round,
2514 Intrinsic::experimental_constrained_round));
2515
2516 case Builtin::BIroundeven:
2517 case Builtin::BIroundevenf:
2518 case Builtin::BIroundevenl:
2519 case Builtin::BI__builtin_roundeven:
2520 case Builtin::BI__builtin_roundevenf:
2521 case Builtin::BI__builtin_roundevenf16:
2522 case Builtin::BI__builtin_roundevenl:
2523 case Builtin::BI__builtin_roundevenf128:
2525 Intrinsic::roundeven,
2526 Intrinsic::experimental_constrained_roundeven));
2527
2528 case Builtin::BIsin:
2529 case Builtin::BIsinf:
2530 case Builtin::BIsinl:
2531 case Builtin::BI__builtin_sin:
2532 case Builtin::BI__builtin_sinf:
2533 case Builtin::BI__builtin_sinf16:
2534 case Builtin::BI__builtin_sinl:
2535 case Builtin::BI__builtin_sinf128:
2537 Intrinsic::sin,
2538 Intrinsic::experimental_constrained_sin));
2539
2540 case Builtin::BIsqrt:
2541 case Builtin::BIsqrtf:
2542 case Builtin::BIsqrtl:
2543 case Builtin::BI__builtin_sqrt:
2544 case Builtin::BI__builtin_sqrtf:
2545 case Builtin::BI__builtin_sqrtf16:
2546 case Builtin::BI__builtin_sqrtl:
2547 case Builtin::BI__builtin_sqrtf128: {
2549 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2550 SetSqrtFPAccuracy(Call);
2551 return RValue::get(Call);
2552 }
2553 case Builtin::BItrunc:
2554 case Builtin::BItruncf:
2555 case Builtin::BItruncl:
2556 case Builtin::BI__builtin_trunc:
2557 case Builtin::BI__builtin_truncf:
2558 case Builtin::BI__builtin_truncf16:
2559 case Builtin::BI__builtin_truncl:
2560 case Builtin::BI__builtin_truncf128:
2562 Intrinsic::trunc,
2563 Intrinsic::experimental_constrained_trunc));
2564
2565 case Builtin::BIlround:
2566 case Builtin::BIlroundf:
2567 case Builtin::BIlroundl:
2568 case Builtin::BI__builtin_lround:
2569 case Builtin::BI__builtin_lroundf:
2570 case Builtin::BI__builtin_lroundl:
2571 case Builtin::BI__builtin_lroundf128:
2573 *this, E, Intrinsic::lround,
2574 Intrinsic::experimental_constrained_lround));
2575
2576 case Builtin::BIllround:
2577 case Builtin::BIllroundf:
2578 case Builtin::BIllroundl:
2579 case Builtin::BI__builtin_llround:
2580 case Builtin::BI__builtin_llroundf:
2581 case Builtin::BI__builtin_llroundl:
2582 case Builtin::BI__builtin_llroundf128:
2584 *this, E, Intrinsic::llround,
2585 Intrinsic::experimental_constrained_llround));
2586
2587 case Builtin::BIlrint:
2588 case Builtin::BIlrintf:
2589 case Builtin::BIlrintl:
2590 case Builtin::BI__builtin_lrint:
2591 case Builtin::BI__builtin_lrintf:
2592 case Builtin::BI__builtin_lrintl:
2593 case Builtin::BI__builtin_lrintf128:
2595 *this, E, Intrinsic::lrint,
2596 Intrinsic::experimental_constrained_lrint));
2597
2598 case Builtin::BIllrint:
2599 case Builtin::BIllrintf:
2600 case Builtin::BIllrintl:
2601 case Builtin::BI__builtin_llrint:
2602 case Builtin::BI__builtin_llrintf:
2603 case Builtin::BI__builtin_llrintl:
2604 case Builtin::BI__builtin_llrintf128:
2606 *this, E, Intrinsic::llrint,
2607 Intrinsic::experimental_constrained_llrint));
2608 case Builtin::BI__builtin_ldexp:
2609 case Builtin::BI__builtin_ldexpf:
2610 case Builtin::BI__builtin_ldexpl:
2611 case Builtin::BI__builtin_ldexpf16:
2612 case Builtin::BI__builtin_ldexpf128: {
2614 *this, E, Intrinsic::ldexp,
2615 Intrinsic::experimental_constrained_ldexp));
2616 }
2617 default:
2618 break;
2619 }
2620 }
2621
2622 switch (BuiltinIDIfNoAsmLabel) {
2623 default: break;
2624 case Builtin::BI__builtin___CFStringMakeConstantString:
2625 case Builtin::BI__builtin___NSStringMakeConstantString:
2626 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
2627 case Builtin::BI__builtin_stdarg_start:
2628 case Builtin::BI__builtin_va_start:
2629 case Builtin::BI__va_start:
2630 case Builtin::BI__builtin_va_end:
2631 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
2632 ? EmitScalarExpr(E->getArg(0))
2633 : EmitVAListRef(E->getArg(0)).getPointer(),
2634 BuiltinID != Builtin::BI__builtin_va_end);
2635 return RValue::get(nullptr);
2636 case Builtin::BI__builtin_va_copy: {
2637 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
2638 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
2639
2640 llvm::Type *Type = Int8PtrTy;
2641
2642 DstPtr = Builder.CreateBitCast(DstPtr, Type);
2643 SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
2644 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
2645 return RValue::get(nullptr);
2646 }
2647 case Builtin::BI__builtin_abs:
2648 case Builtin::BI__builtin_labs:
2649 case Builtin::BI__builtin_llabs: {
2650 // X < 0 ? -X : X
2651 // The negation has 'nsw' because abs of INT_MIN is undefined.
2652 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2653 Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
2654 Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
2655 Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2656 Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
2657 return RValue::get(Result);
2658 }
2659 case Builtin::BI__builtin_complex: {
2660 Value *Real = EmitScalarExpr(E->getArg(0));
2661 Value *Imag = EmitScalarExpr(E->getArg(1));
2662 return RValue::getComplex({Real, Imag});
2663 }
2664 case Builtin::BI__builtin_conj:
2665 case Builtin::BI__builtin_conjf:
2666 case Builtin::BI__builtin_conjl:
2667 case Builtin::BIconj:
2668 case Builtin::BIconjf:
2669 case Builtin::BIconjl: {
2670 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2671 Value *Real = ComplexVal.first;
2672 Value *Imag = ComplexVal.second;
2673 Imag = Builder.CreateFNeg(Imag, "neg");
2674 return RValue::getComplex(std::make_pair(Real, Imag));
2675 }
2676 case Builtin::BI__builtin_creal:
2677 case Builtin::BI__builtin_crealf:
2678 case Builtin::BI__builtin_creall:
2679 case Builtin::BIcreal:
2680 case Builtin::BIcrealf:
2681 case Builtin::BIcreall: {
2682 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2683 return RValue::get(ComplexVal.first);
2684 }
2685
2686 case Builtin::BI__builtin_preserve_access_index: {
2687 // Only enabled preserved access index region when debuginfo
2688 // is available as debuginfo is needed to preserve user-level
2689 // access pattern.
2690 if (!getDebugInfo()) {
2691 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
2692 return RValue::get(EmitScalarExpr(E->getArg(0)));
2693 }
2694
2695 // Nested builtin_preserve_access_index() not supported
2697 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
2698 return RValue::get(EmitScalarExpr(E->getArg(0)));
2699 }
2700
2701 IsInPreservedAIRegion = true;
2702 Value *Res = EmitScalarExpr(E->getArg(0));
2703 IsInPreservedAIRegion = false;
2704 return RValue::get(Res);
2705 }
2706
2707 case Builtin::BI__builtin_cimag:
2708 case Builtin::BI__builtin_cimagf:
2709 case Builtin::BI__builtin_cimagl:
2710 case Builtin::BIcimag:
2711 case Builtin::BIcimagf:
2712 case Builtin::BIcimagl: {
2713 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2714 return RValue::get(ComplexVal.second);
2715 }
2716
2717 case Builtin::BI__builtin_clrsb:
2718 case Builtin::BI__builtin_clrsbl:
2719 case Builtin::BI__builtin_clrsbll: {
2720 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
2721 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2722
2723 llvm::Type *ArgType = ArgValue->getType();
2724 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2725
2726 llvm::Type *ResultType = ConvertType(E->getType());
2727 Value *Zero = llvm::Constant::getNullValue(ArgType);
2728 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
2729 Value *Inverse = Builder.CreateNot(ArgValue, "not");
2730 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
2731 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
2732 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
2733 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2734 "cast");
2735 return RValue::get(Result);
2736 }
2737 case Builtin::BI__builtin_ctzs:
2738 case Builtin::BI__builtin_ctz:
2739 case Builtin::BI__builtin_ctzl:
2740 case Builtin::BI__builtin_ctzll: {
2742
2743 llvm::Type *ArgType = ArgValue->getType();
2744 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2745
2746 llvm::Type *ResultType = ConvertType(E->getType());
2747 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
2748 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
2749 if (Result->getType() != ResultType)
2750 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2751 "cast");
2752 return RValue::get(Result);
2753 }
2754 case Builtin::BI__builtin_clzs:
2755 case Builtin::BI__builtin_clz:
2756 case Builtin::BI__builtin_clzl:
2757 case Builtin::BI__builtin_clzll: {
2759
2760 llvm::Type *ArgType = ArgValue->getType();
2761 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2762
2763 llvm::Type *ResultType = ConvertType(E->getType());
2764 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
2765 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
2766 if (Result->getType() != ResultType)
2767 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2768 "cast");
2769 return RValue::get(Result);
2770 }
2771 case Builtin::BI__builtin_ffs:
2772 case Builtin::BI__builtin_ffsl:
2773 case Builtin::BI__builtin_ffsll: {
2774 // ffs(x) -> x ? cttz(x) + 1 : 0
2775 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2776
2777 llvm::Type *ArgType = ArgValue->getType();
2778 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2779
2780 llvm::Type *ResultType = ConvertType(E->getType());
2781 Value *Tmp =
2782 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
2783 llvm::ConstantInt::get(ArgType, 1));
2784 Value *Zero = llvm::Constant::getNullValue(ArgType);
2785 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
2786 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
2787 if (Result->getType() != ResultType)
2788 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2789 "cast");
2790 return RValue::get(Result);
2791 }
2792 case Builtin::BI__builtin_parity:
2793 case Builtin::BI__builtin_parityl:
2794 case Builtin::BI__builtin_parityll: {
2795 // parity(x) -> ctpop(x) & 1
2796 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2797
2798 llvm::Type *ArgType = ArgValue->getType();
2799 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
2800
2801 llvm::Type *ResultType = ConvertType(E->getType());
2802 Value *Tmp = Builder.CreateCall(F, ArgValue);
2803 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
2804 if (Result->getType() != ResultType)
2805 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2806 "cast");
2807 return RValue::get(Result);
2808 }
2809 case Builtin::BI__lzcnt16:
2810 case Builtin::BI__lzcnt:
2811 case Builtin::BI__lzcnt64: {
2812 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2813
2814 llvm::Type *ArgType = ArgValue->getType();
2815 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2816
2817 llvm::Type *ResultType = ConvertType(E->getType());
2818 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
2819 if (Result->getType() != ResultType)
2820 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2821 "cast");
2822 return RValue::get(Result);
2823 }
2824 case Builtin::BI__popcnt16:
2825 case Builtin::BI__popcnt:
2826 case Builtin::BI__popcnt64:
2827 case Builtin::BI__builtin_popcount:
2828 case Builtin::BI__builtin_popcountl:
2829 case Builtin::BI__builtin_popcountll: {
2830 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2831
2832 llvm::Type *ArgType = ArgValue->getType();
2833 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
2834
2835 llvm::Type *ResultType = ConvertType(E->getType());
2836 Value *Result = Builder.CreateCall(F, ArgValue);
2837 if (Result->getType() != ResultType)
2838 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2839 "cast");
2840 return RValue::get(Result);
2841 }
2842 case Builtin::BI__builtin_unpredictable: {
2843 // Always return the argument of __builtin_unpredictable. LLVM does not
2844 // handle this builtin. Metadata for this builtin should be added directly
2845 // to instructions such as branches or switches that use it.
2846 return RValue::get(EmitScalarExpr(E->getArg(0)));
2847 }
2848 case Builtin::BI__builtin_expect: {
2849 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2850 llvm::Type *ArgType = ArgValue->getType();
2851
2852 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
2853 // Don't generate llvm.expect on -O0 as the backend won't use it for
2854 // anything.
2855 // Note, we still IRGen ExpectedValue because it could have side-effects.
2856 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2857 return RValue::get(ArgValue);
2858
2859 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
2860 Value *Result =
2861 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
2862 return RValue::get(Result);
2863 }
2864 case Builtin::BI__builtin_expect_with_probability: {
2865 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2866 llvm::Type *ArgType = ArgValue->getType();
2867
2868 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
2869 llvm::APFloat Probability(0.0);
2870 const Expr *ProbArg = E->getArg(2);
2871 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
2872 assert(EvalSucceed && "probability should be able to evaluate as float");
2873 (void)EvalSucceed;
2874 bool LoseInfo = false;
2875 Probability.convert(llvm::APFloat::IEEEdouble(),
2876 llvm::RoundingMode::Dynamic, &LoseInfo);
2877 llvm::Type *Ty = ConvertType(ProbArg->getType());
2878 Constant *Confidence = ConstantFP::get(Ty, Probability);
2879 // Don't generate llvm.expect.with.probability on -O0 as the backend
2880 // won't use it for anything.
2881 // Note, we still IRGen ExpectedValue because it could have side-effects.
2882 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2883 return RValue::get(ArgValue);
2884
2885 Function *FnExpect =
2886 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
2887 Value *Result = Builder.CreateCall(
2888 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
2889 return RValue::get(Result);
2890 }
2891 case Builtin::BI__builtin_assume_aligned: {
2892 const Expr *Ptr = E->getArg(0);
2893 Value *PtrValue = EmitScalarExpr(Ptr);
2894 Value *OffsetValue =
2895 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
2896
2897 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
2898 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
2899 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
2900 AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
2901 llvm::Value::MaximumAlignment);
2902
2903 emitAlignmentAssumption(PtrValue, Ptr,
2904 /*The expr loc is sufficient.*/ SourceLocation(),
2905 AlignmentCI, OffsetValue);
2906 return RValue::get(PtrValue);
2907 }
2908 case Builtin::BI__assume:
2909 case Builtin::BI__builtin_assume: {
2910 if (E->getArg(0)->HasSideEffects(getContext()))
2911 return RValue::get(nullptr);
2912
2913 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2914 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
2915 Builder.CreateCall(FnAssume, ArgValue);
2916 return RValue::get(nullptr);
2917 }
2918 case Builtin::BI__builtin_assume_separate_storage: {
2919 const Expr *Arg0 = E->getArg(0);
2920 const Expr *Arg1 = E->getArg(1);
2921
2922 Value *Value0 = EmitScalarExpr(Arg0);
2923 Value *Value1 = EmitScalarExpr(Arg1);
2924
2925 Value *Values[] = {Value0, Value1};
2926 OperandBundleDefT<Value *> OBD("separate_storage", Values);
2927 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
2928 return RValue::get(nullptr);
2929 }
2930 case Builtin::BI__arithmetic_fence: {
2931 // Create the builtin call if FastMath is selected, and the target
2932 // supports the builtin, otherwise just return the argument.
2933 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2934 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
2935 bool isArithmeticFenceEnabled =
2936 FMF.allowReassoc() &&
2938 QualType ArgType = E->getArg(0)->getType();
2939 if (ArgType->isComplexType()) {
2940 if (isArithmeticFenceEnabled) {
2941 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
2942 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2943 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
2944 ConvertType(ElementType));
2945 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
2946 ConvertType(ElementType));
2947 return RValue::getComplex(std::make_pair(Real, Imag));
2948 }
2949 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
2950 Value *Real = ComplexVal.first;
2951 Value *Imag = ComplexVal.second;
2952 return RValue::getComplex(std::make_pair(Real, Imag));
2953 }
2954 Value *ArgValue = EmitScalarExpr(E->getArg(0));
2955 if (isArithmeticFenceEnabled)
2956 return RValue::get(
2957 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
2958 return RValue::get(ArgValue);
2959 }
2960 case Builtin::BI__builtin_bswap16:
2961 case Builtin::BI__builtin_bswap32:
2962 case Builtin::BI__builtin_bswap64:
2963 case Builtin::BI_byteswap_ushort:
2964 case Builtin::BI_byteswap_ulong:
2965 case Builtin::BI_byteswap_uint64: {
2966 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
2967 }
2968 case Builtin::BI__builtin_bitreverse8:
2969 case Builtin::BI__builtin_bitreverse16:
2970 case Builtin::BI__builtin_bitreverse32:
2971 case Builtin::BI__builtin_bitreverse64: {
2972 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
2973 }
2974 case Builtin::BI__builtin_rotateleft8:
2975 case Builtin::BI__builtin_rotateleft16:
2976 case Builtin::BI__builtin_rotateleft32:
2977 case Builtin::BI__builtin_rotateleft64:
2978 case Builtin::BI_rotl8: // Microsoft variants of rotate left
2979 case Builtin::BI_rotl16:
2980 case Builtin::BI_rotl:
2981 case Builtin::BI_lrotl:
2982 case Builtin::BI_rotl64:
2983 return emitRotate(E, false);
2984
2985 case Builtin::BI__builtin_rotateright8:
2986 case Builtin::BI__builtin_rotateright16:
2987 case Builtin::BI__builtin_rotateright32:
2988 case Builtin::BI__builtin_rotateright64:
2989 case Builtin::BI_rotr8: // Microsoft variants of rotate right
2990 case Builtin::BI_rotr16:
2991 case Builtin::BI_rotr:
2992 case Builtin::BI_lrotr:
2993 case Builtin::BI_rotr64:
2994 return emitRotate(E, true);
2995
2996 case Builtin::BI__builtin_constant_p: {
2997 llvm::Type *ResultType = ConvertType(E->getType());
2998
2999 const Expr *Arg = E->getArg(0);
3000 QualType ArgType = Arg->getType();
3001 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3002 // and likely a mistake.
3003 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3004 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3005 // Per the GCC documentation, only numeric constants are recognized after
3006 // inlining.
3007 return RValue::get(ConstantInt::get(ResultType, 0));
3008
3009 if (Arg->HasSideEffects(getContext()))
3010 // The argument is unevaluated, so be conservative if it might have
3011 // side-effects.
3012 return RValue::get(ConstantInt::get(ResultType, 0));
3013
3014 Value *ArgValue = EmitScalarExpr(Arg);
3015 if (ArgType->isObjCObjectPointerType()) {
3016 // Convert Objective-C objects to id because we cannot distinguish between
3017 // LLVM types for Obj-C classes as they are opaque.
3018 ArgType = CGM.getContext().getObjCIdType();
3019 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3020 }
3021 Function *F =
3022 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3023 Value *Result = Builder.CreateCall(F, ArgValue);
3024 if (Result->getType() != ResultType)
3025 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3026 return RValue::get(Result);
3027 }
3028 case Builtin::BI__builtin_dynamic_object_size:
3029 case Builtin::BI__builtin_object_size: {
3030 unsigned Type =
3031 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3032 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3033
3034 // We pass this builtin onto the optimizer so that it can figure out the
3035 // object size in more complex cases.
3036 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3037 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3038 /*EmittedE=*/nullptr, IsDynamic));
3039 }
3040 case Builtin::BI__builtin_prefetch: {
3041 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3042 // FIXME: Technically these constants should of type 'int', yes?
3043 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3044 llvm::ConstantInt::get(Int32Ty, 0);
3045 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3046 llvm::ConstantInt::get(Int32Ty, 3);
3047 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3048 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3049 Builder.CreateCall(F, {Address, RW, Locality, Data});
3050 return RValue::get(nullptr);
3051 }
3052 case Builtin::BI__builtin_readcyclecounter: {
3053 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3054 return RValue::get(Builder.CreateCall(F));
3055 }
3056 case Builtin::BI__builtin___clear_cache: {
3057 Value *Begin = EmitScalarExpr(E->getArg(0));
3058 Value *End = EmitScalarExpr(E->getArg(1));
3059 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3060 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3061 }
3062 case Builtin::BI__builtin_trap:
3063 EmitTrapCall(Intrinsic::trap);
3064 return RValue::get(nullptr);
3065 case Builtin::BI__debugbreak:
3066 EmitTrapCall(Intrinsic::debugtrap);
3067 return RValue::get(nullptr);
3068 case Builtin::BI__builtin_unreachable: {
3070
3071 // We do need to preserve an insertion point.
3072 EmitBlock(createBasicBlock("unreachable.cont"));
3073
3074 return RValue::get(nullptr);
3075 }
3076
3077 case Builtin::BI__builtin_powi:
3078 case Builtin::BI__builtin_powif:
3079 case Builtin::BI__builtin_powil: {
3080 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3081 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3082
3083 if (Builder.getIsFPConstrained()) {
3084 // FIXME: llvm.powi has 2 mangling types,
3085 // llvm.experimental.constrained.powi has one.
3086 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3087 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3088 Src0->getType());
3089 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3090 }
3091
3092 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3093 { Src0->getType(), Src1->getType() });
3094 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3095 }
3096 case Builtin::BI__builtin_frexp:
3097 case Builtin::BI__builtin_frexpf:
3098 case Builtin::BI__builtin_frexpl:
3099 case Builtin::BI__builtin_frexpf128:
3100 case Builtin::BI__builtin_frexpf16:
3101 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3102 case Builtin::BI__builtin_isgreater:
3103 case Builtin::BI__builtin_isgreaterequal:
3104 case Builtin::BI__builtin_isless:
3105 case Builtin::BI__builtin_islessequal:
3106 case Builtin::BI__builtin_islessgreater:
3107 case Builtin::BI__builtin_isunordered: {
3108 // Ordered comparisons: we know the arguments to these are matching scalar
3109 // floating point values.
3110 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3111 Value *LHS = EmitScalarExpr(E->getArg(0));
3112 Value *RHS = EmitScalarExpr(E->getArg(1));
3113
3114 switch (BuiltinID) {
3115 default: llvm_unreachable("Unknown ordered comparison");
3116 case Builtin::BI__builtin_isgreater:
3117 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3118 break;
3119 case Builtin::BI__builtin_isgreaterequal:
3120 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3121 break;
3122 case Builtin::BI__builtin_isless:
3123 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3124 break;
3125 case Builtin::BI__builtin_islessequal:
3126 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3127 break;
3128 case Builtin::BI__builtin_islessgreater:
3129 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3130 break;
3131 case Builtin::BI__builtin_isunordered:
3132 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3133 break;
3134 }
3135 // ZExt bool to int type.
3136 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3137 }
3138
3139 case Builtin::BI__builtin_isnan: {
3140 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3141 Value *V = EmitScalarExpr(E->getArg(0));
3142 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3143 return RValue::get(Result);
3144 return RValue::get(
3145 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3146 ConvertType(E->getType())));
3147 }
3148
3149 case Builtin::BI__builtin_isinf: {
3150 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3151 Value *V = EmitScalarExpr(E->getArg(0));
3152 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3153 return RValue::get(Result);
3154 return RValue::get(
3155 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3156 ConvertType(E->getType())));
3157 }
3158
3159 case Builtin::BIfinite:
3160 case Builtin::BI__finite:
3161 case Builtin::BIfinitef:
3162 case Builtin::BI__finitef:
3163 case Builtin::BIfinitel:
3164 case Builtin::BI__finitel:
3165 case Builtin::BI__builtin_isfinite: {
3166 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3167 Value *V = EmitScalarExpr(E->getArg(0));
3168 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3169 return RValue::get(Result);
3170 return RValue::get(
3171 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3172 ConvertType(E->getType())));
3173 }
3174
3175 case Builtin::BI__builtin_isnormal: {
3176 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3177 Value *V = EmitScalarExpr(E->getArg(0));
3178 return RValue::get(
3179 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3180 ConvertType(E->getType())));
3181 }
3182
3183 case Builtin::BI__builtin_isfpclass: {
3185 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3186 break;
3187 uint64_t Test = Result.Val.getInt().getLimitedValue();
3188 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3189 Value *V = EmitScalarExpr(E->getArg(0));
3190 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3191 ConvertType(E->getType())));
3192 }
3193
3194 case Builtin::BI__builtin_nondeterministic_value: {
3195 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3196
3197 Value *Result = PoisonValue::get(Ty);
3198 Result = Builder.CreateFreeze(Result);
3199
3200 return RValue::get(Result);
3201 }
3202
3203 case Builtin::BI__builtin_elementwise_abs: {
3204 Value *Result;
3205 QualType QT = E->getArg(0)->getType();
3206
3207 if (auto *VecTy = QT->getAs<VectorType>())
3208 QT = VecTy->getElementType();
3209 if (QT->isIntegerType())
3210 Result = Builder.CreateBinaryIntrinsic(
3211 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3212 Builder.getFalse(), nullptr, "elt.abs");
3213 else
3214 Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3215
3216 return RValue::get(Result);
3217 }
3218
3219 case Builtin::BI__builtin_elementwise_ceil:
3220 return RValue::get(
3221 emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3222 case Builtin::BI__builtin_elementwise_exp:
3223 return RValue::get(
3224 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3225 case Builtin::BI__builtin_elementwise_exp2:
3226 return RValue::get(
3227 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3228 case Builtin::BI__builtin_elementwise_log:
3229 return RValue::get(
3230 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3231 case Builtin::BI__builtin_elementwise_log2:
3232 return RValue::get(
3233 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3234 case Builtin::BI__builtin_elementwise_log10:
3235 return RValue::get(
3236 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3237 case Builtin::BI__builtin_elementwise_pow: {
3238 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3239 }
3240 case Builtin::BI__builtin_elementwise_cos:
3241 return RValue::get(
3242 emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3243 case Builtin::BI__builtin_elementwise_floor:
3244 return RValue::get(
3245 emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3246 case Builtin::BI__builtin_elementwise_roundeven:
3247 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3248 "elt.roundeven"));
3249 case Builtin::BI__builtin_elementwise_round:
3250 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3251 "elt.round"));
3252 case Builtin::BI__builtin_elementwise_rint:
3253 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3254 "elt.rint"));
3255 case Builtin::BI__builtin_elementwise_nearbyint:
3256 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3257 "elt.nearbyint"));
3258 case Builtin::BI__builtin_elementwise_sin:
3259 return RValue::get(
3260 emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3261
3262 case Builtin::BI__builtin_elementwise_trunc:
3263 return RValue::get(
3264 emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3265 case Builtin::BI__builtin_elementwise_canonicalize:
3266 return RValue::get(
3267 emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3268 case Builtin::BI__builtin_elementwise_copysign:
3269 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3270 case Builtin::BI__builtin_elementwise_fma:
3271 return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3272 case Builtin::BI__builtin_elementwise_add_sat:
3273 case Builtin::BI__builtin_elementwise_sub_sat: {
3274 Value *Op0 = EmitScalarExpr(E->getArg(0));
3275 Value *Op1 = EmitScalarExpr(E->getArg(1));
3276 Value *Result;
3277 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3278 QualType Ty = E->getArg(0)->getType();
3279 if (auto *VecTy = Ty->getAs<VectorType>())
3280 Ty = VecTy->getElementType();
3281 bool IsSigned = Ty->isSignedIntegerType();
3282 unsigned Opc;
3283 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3284 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3285 else
3286 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3287 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3288 return RValue::get(Result);
3289 }
3290
3291 case Builtin::BI__builtin_elementwise_max: {
3292 Value *Op0 = EmitScalarExpr(E->getArg(0));
3293 Value *Op1 = EmitScalarExpr(E->getArg(1));
3294 Value *Result;
3295 if (Op0->getType()->isIntOrIntVectorTy()) {
3296 QualType Ty = E->getArg(0)->getType();
3297 if (auto *VecTy = Ty->getAs<VectorType>())
3298 Ty = VecTy->getElementType();
3299 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3300 ? llvm::Intrinsic::smax
3301 : llvm::Intrinsic::umax,
3302 Op0, Op1, nullptr, "elt.max");
3303 } else
3304 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3305 return RValue::get(Result);
3306 }
3307 case Builtin::BI__builtin_elementwise_min: {
3308 Value *Op0 = EmitScalarExpr(E->getArg(0));
3309 Value *Op1 = EmitScalarExpr(E->getArg(1));
3310 Value *Result;
3311 if (Op0->getType()->isIntOrIntVectorTy()) {
3312 QualType Ty = E->getArg(0)->getType();
3313 if (auto *VecTy = Ty->getAs<VectorType>())
3314 Ty = VecTy->getElementType();
3315 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3316 ? llvm::Intrinsic::smin
3317 : llvm::Intrinsic::umin,
3318 Op0, Op1, nullptr, "elt.min");
3319 } else
3320 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3321 return RValue::get(Result);
3322 }
3323
3324 case Builtin::BI__builtin_reduce_max: {
3325 auto GetIntrinsicID = [](QualType QT) {
3326 if (auto *VecTy = QT->getAs<VectorType>())
3327 QT = VecTy->getElementType();
3328 if (QT->isSignedIntegerType())
3329 return llvm::Intrinsic::vector_reduce_smax;
3330 if (QT->isUnsignedIntegerType())
3331 return llvm::Intrinsic::vector_reduce_umax;
3332 assert(QT->isFloatingType() && "must have a float here");
3333 return llvm::Intrinsic::vector_reduce_fmax;
3334 };
3336 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3337 }
3338
3339 case Builtin::BI__builtin_reduce_min: {
3340 auto GetIntrinsicID = [](QualType QT) {
3341 if (auto *VecTy = QT->getAs<VectorType>())
3342 QT = VecTy->getElementType();
3343 if (QT->isSignedIntegerType())
3344 return llvm::Intrinsic::vector_reduce_smin;
3345 if (QT->isUnsignedIntegerType())
3346 return llvm::Intrinsic::vector_reduce_umin;
3347 assert(QT->isFloatingType() && "must have a float here");
3348 return llvm::Intrinsic::vector_reduce_fmin;
3349 };
3350
3352 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3353 }
3354
3355 case Builtin::BI__builtin_reduce_add:
3357 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3358 case Builtin::BI__builtin_reduce_mul:
3360 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3361 case Builtin::BI__builtin_reduce_xor:
3363 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3364 case Builtin::BI__builtin_reduce_or:
3366 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3367 case Builtin::BI__builtin_reduce_and:
3369 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3370
3371 case Builtin::BI__builtin_matrix_transpose: {
3372 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3373 Value *MatValue = EmitScalarExpr(E->getArg(0));
3374 MatrixBuilder MB(Builder);
3375 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3376 MatrixTy->getNumColumns());
3377 return RValue::get(Result);
3378 }
3379
3380 case Builtin::BI__builtin_matrix_column_major_load: {
3381 MatrixBuilder MB(Builder);
3382 // Emit everything that isn't dependent on the first parameter type
3383 Value *Stride = EmitScalarExpr(E->getArg(3));
3384 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3385 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3386 assert(PtrTy && "arg0 must be of pointer type");
3387 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3388
3391 E->getArg(0)->getExprLoc(), FD, 0);
3392 Value *Result = MB.CreateColumnMajorLoad(
3393 Src.getElementType(), Src.getPointer(),
3394 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3395 ResultTy->getNumRows(), ResultTy->getNumColumns(),
3396 "matrix");
3397 return RValue::get(Result);
3398 }
3399
3400 case Builtin::BI__builtin_matrix_column_major_store: {
3401 MatrixBuilder MB(Builder);
3402 Value *Matrix = EmitScalarExpr(E->getArg(0));
3404 Value *Stride = EmitScalarExpr(E->getArg(2));
3405
3406 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3407 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3408 assert(PtrTy && "arg1 must be of pointer type");
3409 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3410
3412 E->getArg(1)->getExprLoc(), FD, 0);
3413 Value *Result = MB.CreateColumnMajorStore(
3414 Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
3415 Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3416 return RValue::get(Result);
3417 }
3418
3419 case Builtin::BI__builtin_isinf_sign: {
3420 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3421 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3422 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3423 Value *Arg = EmitScalarExpr(E->getArg(0));
3424 Value *AbsArg = EmitFAbs(*this, Arg);
3425 Value *IsInf = Builder.CreateFCmpOEQ(
3426 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3427 Value *IsNeg = EmitSignBit(*this, Arg);
3428
3429 llvm::Type *IntTy = ConvertType(E->getType());
3430 Value *Zero = Constant::getNullValue(IntTy);
3431 Value *One = ConstantInt::get(IntTy, 1);
3432 Value *NegativeOne = ConstantInt::get(IntTy, -1);
3433 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3434 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
3435 return RValue::get(Result);
3436 }
3437
3438 case Builtin::BI__builtin_flt_rounds: {
3439 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
3440
3441 llvm::Type *ResultType = ConvertType(E->getType());
3442 Value *Result = Builder.CreateCall(F);
3443 if (Result->getType() != ResultType)
3444 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3445 "cast");
3446 return RValue::get(Result);
3447 }
3448
3449 case Builtin::BI__builtin_set_flt_rounds: {
3450 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
3451
3452 Value *V = EmitScalarExpr(E->getArg(0));
3453 Builder.CreateCall(F, V);
3454 return RValue::get(nullptr);
3455 }
3456
3457 case Builtin::BI__builtin_fpclassify: {
3458 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3459 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3460 Value *V = EmitScalarExpr(E->getArg(5));
3461 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
3462
3463 // Create Result
3464 BasicBlock *Begin = Builder.GetInsertBlock();
3465 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
3466 Builder.SetInsertPoint(End);
3467 PHINode *Result =
3468 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
3469 "fpclassify_result");
3470
3471 // if (V==0) return FP_ZERO
3472 Builder.SetInsertPoint(Begin);
3473 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
3474 "iszero");
3475 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
3476 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
3477 Builder.CreateCondBr(IsZero, End, NotZero);
3478 Result->addIncoming(ZeroLiteral, Begin);
3479
3480 // if (V != V) return FP_NAN
3481 Builder.SetInsertPoint(NotZero);
3482 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
3483 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
3484 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
3485 Builder.CreateCondBr(IsNan, End, NotNan);
3486 Result->addIncoming(NanLiteral, NotZero);
3487
3488 // if (fabs(V) == infinity) return FP_INFINITY
3489 Builder.SetInsertPoint(NotNan);
3490 Value *VAbs = EmitFAbs(*this, V);
3491 Value *IsInf =
3492 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
3493 "isinf");
3494 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
3495 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
3496 Builder.CreateCondBr(IsInf, End, NotInf);
3497 Result->addIncoming(InfLiteral, NotNan);
3498
3499 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3500 Builder.SetInsertPoint(NotInf);
3501 APFloat Smallest = APFloat::getSmallestNormalized(
3502 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
3503 Value *IsNormal =
3504 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
3505 "isnormal");
3506 Value *NormalResult =
3507 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
3508 EmitScalarExpr(E->getArg(3)));
3509 Builder.CreateBr(End);
3510 Result->addIncoming(NormalResult, NotInf);
3511
3512 // return Result
3513 Builder.SetInsertPoint(End);
3514 return RValue::get(Result);
3515 }
3516
3517 case Builtin::BIalloca:
3518 case Builtin::BI_alloca:
3519 case Builtin::BI__builtin_alloca_uninitialized:
3520 case Builtin::BI__builtin_alloca: {
3521 Value *Size = EmitScalarExpr(E->getArg(0));
3522 const TargetInfo &TI = getContext().getTargetInfo();
3523 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
3524 const Align SuitableAlignmentInBytes =
3525 CGM.getContext()
3527 .getAsAlign();
3528 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3529 AI->setAlignment(SuitableAlignmentInBytes);
3530 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
3531 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
3532 return RValue::get(AI);
3533 }
3534
3535 case Builtin::BI__builtin_alloca_with_align_uninitialized:
3536 case Builtin::BI__builtin_alloca_with_align: {
3537 Value *Size = EmitScalarExpr(E->getArg(0));
3538 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
3539 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
3540 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
3541 const Align AlignmentInBytes =
3542 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
3543 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3544 AI->setAlignment(AlignmentInBytes);
3545 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
3546 initializeAlloca(*this, AI, Size, AlignmentInBytes);
3547 return RValue::get(AI);
3548 }
3549
3550 case Builtin::BIbzero:
3551 case Builtin::BI__builtin_bzero: {
3553 Value *SizeVal = EmitScalarExpr(E->getArg(1));
3555 E->getArg(0)->getExprLoc(), FD, 0);
3556 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
3557 return RValue::get(nullptr);
3558 }
3559 case Builtin::BImemcpy:
3560 case Builtin::BI__builtin_memcpy:
3561 case Builtin::BImempcpy:
3562 case Builtin::BI__builtin_mempcpy: {
3565 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3567 E->getArg(0)->getExprLoc(), FD, 0);
3569 E->getArg(1)->getExprLoc(), FD, 1);
3570 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
3571 if (BuiltinID == Builtin::BImempcpy ||
3572 BuiltinID == Builtin::BI__builtin_mempcpy)
3573 return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
3574 Dest.getPointer(), SizeVal));
3575 else
3576 return RValue::get(Dest.getPointer());
3577 }
3578
3579 case Builtin::BI__builtin_memcpy_inline: {
3582 uint64_t Size =
3583 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3585 E->getArg(0)->getExprLoc(), FD, 0);
3587 E->getArg(1)->getExprLoc(), FD, 1);
3588 Builder.CreateMemCpyInline(Dest, Src, Size);
3589 return RValue::get(nullptr);
3590 }
3591
3592 case Builtin::BI__builtin_char_memchr:
3593 BuiltinID = Builtin::BI__builtin_memchr;
3594 break;
3595
3596 case Builtin::BI__builtin___memcpy_chk: {
3597 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
3598 Expr::EvalResult SizeResult, DstSizeResult;
3599 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3600 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3601 break;
3602 llvm::APSInt Size = SizeResult.Val.getInt();
3603 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3604 if (Size.ugt(DstSize))
3605 break;
3608 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3609 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
3610 return RValue::get(Dest.getPointer());
3611 }
3612
3613 case Builtin::BI__builtin_objc_memmove_collectable: {
3614 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
3615 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
3616 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3618 DestAddr, SrcAddr, SizeVal);
3619 return RValue::get(DestAddr.getPointer());
3620 }
3621
3622 case Builtin::BI__builtin___memmove_chk: {
3623 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
3624 Expr::EvalResult SizeResult, DstSizeResult;
3625 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3626 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3627 break;
3628 llvm::APSInt Size = SizeResult.Val.getInt();
3629 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3630 if (Size.ugt(DstSize))
3631 break;
3634 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3635 Builder.CreateMemMove(Dest, Src, SizeVal, false);
3636 return RValue::get(Dest.getPointer());
3637 }
3638
3639 case Builtin::BImemmove:
3640 case Builtin::BI__builtin_memmove: {
3643 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3645 E->getArg(0)->getExprLoc(), FD, 0);
3647 E->getArg(1)->getExprLoc(), FD, 1);
3648 Builder.CreateMemMove(Dest, Src, SizeVal, false);
3649 return RValue::get(Dest.getPointer());
3650 }
3651 case Builtin::BImemset:
3652 case Builtin::BI__builtin_memset: {
3654 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
3655 Builder.getInt8Ty());
3656 Value *SizeVal = EmitScalarExpr(E->getArg(2));
3658 E->getArg(0)->getExprLoc(), FD, 0);
3659 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
3660 return RValue::get(Dest.getPointer());
3661 }
3662 case Builtin::BI__builtin_memset_inline: {
3664 Value *ByteVal =
3665 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
3666 uint64_t Size =
3667 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
3669 E->getArg(0)->getExprLoc(), FD, 0);
3670 Builder.CreateMemSetInline(Dest, ByteVal, Size);
3671 return RValue::get(nullptr);
3672 }
3673 case Builtin::BI__builtin___memset_chk: {
3674 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
3675 Expr::EvalResult SizeResult, DstSizeResult;
3676 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
3677 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
3678 break;
3679 llvm::APSInt Size = SizeResult.Val.getInt();
3680 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
3681 if (Size.ugt(DstSize))
3682 break;
3684 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
3685 Builder.getInt8Ty());
3686 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
3687 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
3688 return RValue::get(Dest.getPointer());
3689 }
3690 case Builtin::BI__builtin_wmemchr: {
3691 // The MSVC runtime library does not provide a definition of wmemchr, so we
3692 // need an inline implementation.
3693 if (!getTarget().getTriple().isOSMSVCRT())
3694 break;
3695
3696 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
3697 Value *Str = EmitScalarExpr(E->getArg(0));
3698 Value *Chr = EmitScalarExpr(E->getArg(1));
3699 Value *Size = EmitScalarExpr(E->getArg(2));
3700
3701 BasicBlock *Entry = Builder.GetInsertBlock();
3702 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
3703 BasicBlock *Next = createBasicBlock("wmemchr.next");
3704 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
3705 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
3706 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
3707
3708 EmitBlock(CmpEq);
3709 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
3710 StrPhi->addIncoming(Str, Entry);
3711 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
3712 SizePhi->addIncoming(Size, Entry);
3713 CharUnits WCharAlign =
3715 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
3716 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
3717 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
3718 Builder.CreateCondBr(StrEqChr, Exit, Next);
3719
3720 EmitBlock(Next);
3721 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
3722 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
3723 Value *NextSizeEq0 =
3724 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
3725 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
3726 StrPhi->addIncoming(NextStr, Next);
3727 SizePhi->addIncoming(NextSize, Next);
3728
3729 EmitBlock(Exit);
3730 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
3731 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
3732 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
3733 Ret->addIncoming(FoundChr, CmpEq);
3734 return RValue::get(Ret);
3735 }
3736 case Builtin::BI__builtin_wmemcmp: {
3737 // The MSVC runtime library does not provide a definition of wmemcmp, so we
3738 // need an inline implementation.
3739 if (!getTarget().getTriple().isOSMSVCRT())
3740 break;
3741
3742 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
3743
3744 Value *Dst = EmitScalarExpr(E->getArg(0));
3745 Value *Src = EmitScalarExpr(E->getArg(1));
3746 Value *Size = EmitScalarExpr(E->getArg(2));
3747
3748 BasicBlock *Entry = Builder.GetInsertBlock();
3749 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
3750 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
3751 BasicBlock *Next = createBasicBlock("wmemcmp.next");
3752 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
3753 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
3754 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
3755
3756 EmitBlock(CmpGT);
3757 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
3758 DstPhi->addIncoming(Dst, Entry);
3759 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
3760 SrcPhi->addIncoming(Src, Entry);
3761 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
3762 SizePhi->addIncoming(Size, Entry);
3763 CharUnits WCharAlign =
3765 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
3766 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
3767 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
3768 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
3769
3770 EmitBlock(CmpLT);
3771 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
3772 Builder.CreateCondBr(DstLtSrc, Exit, Next);
3773
3774 EmitBlock(Next);
3775 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
3776 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
3777 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
3778 Value *NextSizeEq0 =
3779 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
3780 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
3781 DstPhi->addIncoming(NextDst, Next);
3782 SrcPhi->addIncoming(NextSrc, Next);
3783 SizePhi->addIncoming(NextSize, Next);
3784
3785 EmitBlock(Exit);
3786 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
3787 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
3788 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
3789 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
3790 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
3791 return RValue::get(Ret);
3792 }
3793 case Builtin::BI__builtin_dwarf_cfa: {
3794 // The offset in bytes from the first argument to the CFA.
3795 //
3796 // Why on earth is this in the frontend? Is there any reason at
3797 // all that the backend can't reasonably determine this while
3798 // lowering llvm.eh.dwarf.cfa()?
3799 //
3800 // TODO: If there's a satisfactory reason, add a target hook for
3801 // this instead of hard-coding 0, which is correct for most targets.
3802 int32_t Offset = 0;
3803
3804 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
3805 return RValue::get(Builder.CreateCall(F,
3806 llvm::ConstantInt::get(Int32Ty, Offset)));
3807 }
3808 case Builtin::BI__builtin_return_address: {
3809 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
3810 getContext().UnsignedIntTy);
3811 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
3812 return RValue::get(Builder.CreateCall(F, Depth));
3813 }
3814 case Builtin::BI_ReturnAddress: {
3815 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
3816 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
3817 }
3818 case Builtin::BI__builtin_frame_address: {
3819 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
3820 getContext().UnsignedIntTy);
3821 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
3822 return RValue::get(Builder.CreateCall(F, Depth));
3823 }
3824 case Builtin::BI__builtin_extract_return_addr: {
3827 return RValue::get(Result);
3828 }
3829 case Builtin::BI__builtin_frob_return_addr: {
3832 return RValue::get(Result);
3833 }
3834 case Builtin::BI__builtin_dwarf_sp_column: {
3835 llvm::IntegerType *Ty
3838 if (Column == -1) {
3839 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
3840 return RValue::get(llvm::UndefValue::get(Ty));
3841 }
3842 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
3843 }
3844 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
3846 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
3847 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
3848 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
3849 }
3850 case Builtin::BI__builtin_eh_return: {
3851 Value *Int = EmitScalarExpr(E->getArg(0));
3852 Value *Ptr = EmitScalarExpr(E->getArg(1));
3853
3854 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
3855 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
3856 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
3857 Function *F =
3858 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
3859 : Intrinsic::eh_return_i64);
3860 Builder.CreateCall(F, {Int, Ptr});
3861 Builder.CreateUnreachable();
3862
3863 // We do need to preserve an insertion point.
3864 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
3865
3866 return RValue::get(nullptr);
3867 }
3868 case Builtin::BI__builtin_unwind_init: {
3869 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
3870 Builder.CreateCall(F);
3871 return RValue::get(nullptr);
3872 }
3873 case Builtin::BI__builtin_extend_pointer: {
3874 // Extends a pointer to the size of an _Unwind_Word, which is
3875 // uint64_t on all platforms. Generally this gets poked into a
3876 // register and eventually used as an address, so if the
3877 // addressing registers are wider than pointers and the platform
3878 // doesn't implicitly ignore high-order bits when doing
3879 // addressing, we need to make sure we zext / sext based on
3880 // the platform's expectations.
3881 //
3882 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
3883
3884 // Cast the pointer to intptr_t.
3885 Value *Ptr = EmitScalarExpr(E->getArg(0));
3886 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
3887
3888 // If that's 64 bits, we're done.
3889 if (IntPtrTy->getBitWidth() == 64)
3890 return RValue::get(Result);
3891
3892 // Otherwise, ask the codegen data what to do.
3893 if (getTargetHooks().extendPointerWithSExt())
3894 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
3895 else
3896 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
3897 }
3898 case Builtin::BI__builtin_setjmp: {
3899 // Buffer is a void**.
3901
3902 // Store the frame pointer to the setjmp buffer.
3903 Value *FrameAddr = Builder.CreateCall(
3904 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
3905 ConstantInt::get(Int32Ty, 0));
3906 Builder.CreateStore(FrameAddr, Buf);
3907
3908 // Store the stack pointer to the setjmp buffer.
3909 Value *StackAddr =
3910 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
3911 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
3912 Builder.CreateStore(StackAddr, StackSaveSlot);
3913
3914 // Call LLVM's EH setjmp, which is lightweight.
3915 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
3916 return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
3917 }
3918 case Builtin::BI__builtin_longjmp: {
3919 Value *Buf = EmitScalarExpr(E->getArg(0));
3920 Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
3921
3922 // Call LLVM's EH longjmp, which is lightweight.
3923 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
3924
3925 // longjmp doesn't return; mark this as unreachable.
3926 Builder.CreateUnreachable();
3927
3928 // We do need to preserve an insertion point.
3929 EmitBlock(createBasicBlock("longjmp.cont"));
3930
3931 return RValue::get(nullptr);
3932 }
3933 case Builtin::BI__builtin_launder: {
3934 const Expr *Arg = E->getArg(0);
3935 QualType ArgTy = Arg->getType()->getPointeeType();
3936 Value *Ptr = EmitScalarExpr(Arg);
3937 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
3938 Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
3939
3940 return RValue::get(Ptr);
3941 }
3942 case Builtin::BI__sync_fetch_and_add:
3943 case Builtin::BI__sync_fetch_and_sub:
3944 case Builtin::BI__sync_fetch_and_or:
3945 case Builtin::BI__sync_fetch_and_and:
3946 case Builtin::BI__sync_fetch_and_xor:
3947 case Builtin::BI__sync_fetch_and_nand:
3948 case Builtin::BI__sync_add_and_fetch:
3949 case Builtin::BI__sync_sub_and_fetch:
3950 case Builtin::BI__sync_and_and_fetch:
3951 case Builtin::BI__sync_or_and_fetch:
3952 case Builtin::BI__sync_xor_and_fetch:
3953 case Builtin::BI__sync_nand_and_fetch:
3954 case Builtin::BI__sync_val_compare_and_swap:
3955 case Builtin::BI__sync_bool_compare_and_swap:
3956 case Builtin::BI__sync_lock_test_and_set:
3957 case Builtin::BI__sync_lock_release:
3958 case Builtin::BI__sync_swap:
3959 llvm_unreachable("Shouldn't make it through sema");
3960 case Builtin::BI__sync_fetch_and_add_1:
3961 case Builtin::BI__sync_fetch_and_add_2:
3962 case Builtin::BI__sync_fetch_and_add_4:
3963 case Builtin::BI__sync_fetch_and_add_8:
3964 case Builtin::BI__sync_fetch_and_add_16:
3965 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
3966 case Builtin::BI__sync_fetch_and_sub_1:
3967 case Builtin::BI__sync_fetch_and_sub_2:
3968 case Builtin::BI__sync_fetch_and_sub_4:
3969 case Builtin::BI__sync_fetch_and_sub_8:
3970 case Builtin::BI__sync_fetch_and_sub_16:
3971 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
3972 case Builtin::BI__sync_fetch_and_or_1:
3973 case Builtin::BI__sync_fetch_and_or_2:
3974 case Builtin::BI__sync_fetch_and_or_4:
3975 case Builtin::BI__sync_fetch_and_or_8:
3976 case Builtin::BI__sync_fetch_and_or_16:
3977 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
3978 case Builtin::BI__sync_fetch_and_and_1:
3979 case Builtin::BI__sync_fetch_and_and_2:
3980 case Builtin::BI__sync_fetch_and_and_4:
3981 case Builtin::BI__sync_fetch_and_and_8:
3982 case Builtin::BI__sync_fetch_and_and_16:
3983 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
3984 case Builtin::BI__sync_fetch_and_xor_1:
3985 case Builtin::BI__sync_fetch_and_xor_2:
3986 case Builtin::BI__sync_fetch_and_xor_4:
3987 case Builtin::BI__sync_fetch_and_xor_8:
3988 case Builtin::BI__sync_fetch_and_xor_16:
3989 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
3990 case Builtin::BI__sync_fetch_and_nand_1:
3991 case Builtin::BI__sync_fetch_and_nand_2:
3992 case Builtin::BI__sync_fetch_and_nand_4:
3993 case Builtin::BI__sync_fetch_and_nand_8:
3994 case Builtin::BI__sync_fetch_and_nand_16:
3995 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
3996
3997 // Clang extensions: not overloaded yet.
3998 case Builtin::BI__sync_fetch_and_min:
3999 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4000 case Builtin::BI__sync_fetch_and_max:
4001 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4002 case Builtin::BI__sync_fetch_and_umin:
4003 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4004 case Builtin::BI__sync_fetch_and_umax:
4005 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4006
4007 case Builtin::BI__sync_add_and_fetch_1:
4008 case Builtin::BI__sync_add_and_fetch_2:
4009 case Builtin::BI__sync_add_and_fetch_4:
4010 case Builtin::BI__sync_add_and_fetch_8:
4011 case Builtin::BI__sync_add_and_fetch_16:
4012 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4013 llvm::Instruction::Add);
4014 case Builtin::BI__sync_sub_and_fetch_1:
4015 case Builtin::BI__sync_sub_and_fetch_2:
4016 case Builtin::BI__sync_sub_and_fetch_4:
4017 case Builtin::BI__sync_sub_and_fetch_8:
4018 case Builtin::BI__sync_sub_and_fetch_16:
4019 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4020 llvm::Instruction::Sub);
4021 case Builtin::BI__sync_and_and_fetch_1:
4022 case Builtin::BI__sync_and_and_fetch_2:
4023 case Builtin::BI__sync_and_and_fetch_4:
4024 case Builtin::BI__sync_and_and_fetch_8:
4025 case Builtin::BI__sync_and_and_fetch_16:
4026 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4027 llvm::Instruction::And);
4028 case Builtin::BI__sync_or_and_fetch_1:
4029 case Builtin::BI__sync_or_and_fetch_2:
4030 case Builtin::BI__sync_or_and_fetch_4:
4031 case Builtin::BI__sync_or_and_fetch_8:
4032 case Builtin::BI__sync_or_and_fetch_16:
4033 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4034 llvm::Instruction::Or);
4035 case Builtin::BI__sync_xor_and_fetch_1:
4036 case Builtin::BI__sync_xor_and_fetch_2:
4037 case Builtin::BI__sync_xor_and_fetch_4:
4038 case Builtin::BI__sync_xor_and_fetch_8:
4039 case Builtin::BI__sync_xor_and_fetch_16:
4040 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4041 llvm::Instruction::Xor);
4042 case Builtin::BI__sync_nand_and_fetch_1:
4043 case Builtin::BI__sync_nand_and_fetch_2:
4044 case Builtin::BI__sync_nand_and_fetch_4:
4045 case Builtin::BI__sync_nand_and_fetch_8:
4046 case Builtin::BI__sync_nand_and_fetch_16:
4047 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4048 llvm::Instruction::And, true);
4049
4050 case Builtin::BI__sync_val_compare_and_swap_1:
4051 case Builtin::BI__sync_val_compare_and_swap_2:
4052 case Builtin::BI__sync_val_compare_and_swap_4:
4053 case Builtin::BI__sync_val_compare_and_swap_8:
4054 case Builtin::BI__sync_val_compare_and_swap_16:
4055 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4056
4057 case Builtin::BI__sync_bool_compare_and_swap_1:
4058 case Builtin::BI__sync_bool_compare_and_swap_2:
4059 case Builtin::BI__sync_bool_compare_and_swap_4:
4060 case Builtin::BI__sync_bool_compare_and_swap_8:
4061 case Builtin::BI__sync_bool_compare_and_swap_16:
4062 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4063
4064 case Builtin::BI__sync_swap_1:
4065 case Builtin::BI__sync_swap_2:
4066 case Builtin::BI__sync_swap_4:
4067 case Builtin::BI__sync_swap_8:
4068 case Builtin::BI__sync_swap_16:
4069 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4070
4071 case Builtin::BI__sync_lock_test_and_set_1:
4072 case Builtin::BI__sync_lock_test_and_set_2:
4073 case Builtin::BI__sync_lock_test_and_set_4:
4074 case Builtin::BI__sync_lock_test_and_set_8:
4075 case Builtin::BI__sync_lock_test_and_set_16:
4076 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4077
4078 case Builtin::BI__sync_lock_release_1:
4079 case Builtin::BI__sync_lock_release_2:
4080 case Builtin::BI__sync_lock_release_4:
4081 case Builtin::BI__sync_lock_release_8:
4082 case Builtin::BI__sync_lock_release_16: {
4083 Value *Ptr = CheckAtomicAlignment(*this, E);
4084 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4085 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4086 llvm::Type *ITy =
4087 llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8);
4088 llvm::StoreInst *Store =
4089 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
4090 StoreSize);
4091 Store->setAtomic(llvm::AtomicOrdering::Release);
4092 return RValue::get(nullptr);
4093 }
4094
4095 case Builtin::BI__sync_synchronize: {
4096 // We assume this is supposed to correspond to a C++0x-style
4097 // sequentially-consistent fence (i.e. this is only usable for
4098 // synchronization, not device I/O or anything like that). This intrinsic
4099 // is really badly designed in the sense that in theory, there isn't
4100 // any way to safely use it... but in practice, it mostly works
4101 // to use it with non-atomic loads and stores to get acquire/release
4102 // semantics.
4103 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4104 return RValue::get(nullptr);
4105 }
4106
4107 case Builtin::BI__builtin_nontemporal_load:
4108 return RValue::get(EmitNontemporalLoad(*this, E));
4109 case Builtin::BI__builtin_nontemporal_store:
4110 return RValue::get(EmitNontemporalStore(*this, E));
4111 case Builtin::BI__c11_atomic_is_lock_free:
4112 case Builtin::BI__atomic_is_lock_free: {
4113 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4114 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4115 // _Atomic(T) is always properly-aligned.
4116 const char *LibCallName = "__atomic_is_lock_free";
4117 CallArgList Args;
4118 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4119 getContext().getSizeType());
4120 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4121 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4123 else
4124 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4126 const CGFunctionInfo &FuncInfo =
4128 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4129 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4130 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4131 ReturnValueSlot(), Args);
4132 }
4133
4134 case Builtin::BI__atomic_test_and_set: {
4135 // Look at the argument type to determine whether this is a volatile
4136 // operation. The parameter type is always volatile.
4137 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4138 bool Volatile =
4139 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4140
4141 Value *Ptr = EmitScalarExpr(E->getArg(0));
4142 Value *NewVal = Builder.getInt8(1);
4143 Value *Order = EmitScalarExpr(E->getArg(1));
4144 if (isa<llvm::ConstantInt>(Order)) {
4145 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4146 AtomicRMWInst *Result = nullptr;
4147 switch (ord) {
4148 case 0: // memory_order_relaxed
4149 default: // invalid order
4150 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4151 llvm::AtomicOrdering::Monotonic);
4152 break;
4153 case 1: // memory_order_consume
4154 case 2: // memory_order_acquire
4155 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4156 llvm::AtomicOrdering::Acquire);
4157 break;
4158 case 3: // memory_order_release
4159 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4160 llvm::AtomicOrdering::Release);
4161 break;
4162 case 4: // memory_order_acq_rel
4163
4164 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4165 llvm::AtomicOrdering::AcquireRelease);
4166 break;
4167 case 5: // memory_order_seq_cst
4168 Result = Builder.CreateAtomicRMW(
4169 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4170 llvm::AtomicOrdering::SequentiallyConsistent);
4171 break;
4172 }
4173 Result->setVolatile(Volatile);
4174 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4175 }
4176
4177 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4178
4179 llvm::BasicBlock *BBs[5] = {
4180 createBasicBlock("monotonic", CurFn),
4181 createBasicBlock("acquire", CurFn),
4182 createBasicBlock("release", CurFn),
4183 createBasicBlock("acqrel", CurFn),
4184 createBasicBlock("seqcst", CurFn)
4185 };
4186 llvm::AtomicOrdering Orders[5] = {
4187 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4188 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4189 llvm::AtomicOrdering::SequentiallyConsistent};
4190
4191 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4192 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4193
4194 Builder.SetInsertPoint(ContBB);
4195 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4196
4197 for (unsigned i = 0; i < 5; ++i) {
4198 Builder.SetInsertPoint(BBs[i]);
4199 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4200 Ptr, NewVal, Orders[i]);
4201 RMW->setVolatile(Volatile);
4202 Result->addIncoming(RMW, BBs[i]);
4203 Builder.CreateBr(ContBB);
4204 }
4205
4206 SI->addCase(Builder.getInt32(0), BBs[0]);
4207 SI->addCase(Builder.getInt32(1), BBs[1]);
4208 SI->addCase(Builder.getInt32(2), BBs[1]);
4209 SI->addCase(Builder.getInt32(3), BBs[2]);
4210 SI->addCase(Builder.getInt32(4), BBs[3]);
4211 SI->addCase(Builder.getInt32(5), BBs[4]);
4212
4213 Builder.SetInsertPoint(ContBB);
4214 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4215 }
4216
4217 case Builtin::BI__atomic_clear: {
4218 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4219 bool Volatile =
4220 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4221
4223 Ptr = Ptr.withElementType(Int8Ty);
4224 Value *NewVal = Builder.getInt8(0);
4225 Value *Order = EmitScalarExpr(E->getArg(1));
4226 if (isa<llvm::ConstantInt>(Order)) {
4227 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4228 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4229 switch (ord) {
4230 case 0: // memory_order_relaxed
4231 default: // invalid order
4232 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4233 break;
4234 case 3: // memory_order_release
4235 Store->setOrdering(llvm::AtomicOrdering::Release);
4236 break;
4237 case 5: // memory_order_seq_cst
4238 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4239 break;
4240 }
4241 return RValue::get(nullptr);
4242 }
4243
4244 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4245
4246 llvm::BasicBlock *BBs[3] = {
4247 createBasicBlock("monotonic", CurFn),
4248 createBasicBlock("release", CurFn),
4249 createBasicBlock("seqcst", CurFn)
4250 };
4251 llvm::AtomicOrdering Orders[3] = {
4252 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4253 llvm::AtomicOrdering::SequentiallyConsistent};
4254
4255 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4256 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4257
4258 for (unsigned i = 0; i < 3; ++i) {
4259 Builder.SetInsertPoint(BBs[i]);
4260 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4261 Store->setOrdering(Orders[i]);
4262 Builder.CreateBr(ContBB);
4263 }
4264
4265 SI->addCase(Builder.getInt32(0), BBs[0]);
4266 SI->addCase(Builder.getInt32(3), BBs[1]);
4267 SI->addCase(Builder.getInt32(5), BBs[2]);
4268
4269 Builder.SetInsertPoint(ContBB);
4270 return RValue::get(nullptr);
4271 }
4272
4273 case Builtin::BI__atomic_thread_fence:
4274 case Builtin::BI__atomic_signal_fence:
4275 case Builtin::BI__c11_atomic_thread_fence:
4276 case Builtin::BI__c11_atomic_signal_fence: {
4277 llvm::SyncScope::ID SSID;
4278 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4279 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4280 SSID = llvm::SyncScope::SingleThread;
4281 else
4282 SSID = llvm::SyncScope::System;
4283 Value *Order = EmitScalarExpr(E->getArg(0));
4284 if (isa<llvm::ConstantInt>(Order)) {
4285 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4286 switch (ord) {
4287 case 0: // memory_order_relaxed
4288 default: // invalid order
4289 break;
4290 case 1: // memory_order_consume
4291 case 2: // memory_order_acquire
4292 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4293 break;
4294 case 3: // memory_order_release
4295 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4296 break;
4297 case 4: // memory_order_acq_rel
4298 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4299 break;
4300 case 5: // memory_order_seq_cst
4301 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4302 break;
4303 }
4304 return RValue::get(nullptr);
4305 }
4306
4307 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4308 AcquireBB = createBasicBlock("acquire", CurFn);
4309 ReleaseBB = createBasicBlock("release", CurFn);
4310 AcqRelBB = createBasicBlock("acqrel", CurFn);
4311 SeqCstBB = createBasicBlock("seqcst", CurFn);
4312 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4313
4314 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4315 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4316
4317 Builder.SetInsertPoint(AcquireBB);
4318 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4319 Builder.CreateBr(ContBB);
4320 SI->addCase(Builder.getInt32(1), AcquireBB);
4321 SI->addCase(Builder.getInt32(2), AcquireBB);
4322
4323 Builder.SetInsertPoint(ReleaseBB);
4324 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4325 Builder.CreateBr(ContBB);
4326 SI->addCase(Builder.getInt32(3), ReleaseBB);
4327
4328 Builder.SetInsertPoint(AcqRelBB);
4329 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4330 Builder.CreateBr(ContBB);
4331 SI->addCase(Builder.getInt32(4), AcqRelBB);
4332
4333 Builder.SetInsertPoint(SeqCstBB);
4334 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4335 Builder.CreateBr(ContBB);
4336 SI->addCase(Builder.getInt32(5), SeqCstBB);
4337
4338 Builder.SetInsertPoint(ContBB);
4339 return RValue::get(nullptr);
4340 }
4341
4342 case Builtin::BI__builtin_signbit:
4343 case Builtin::BI__builtin_signbitf:
4344 case Builtin::BI__builtin_signbitl: {
4345 return RValue::get(
4346 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4347 ConvertType(E->getType())));
4348 }
4349 case Builtin::BI__warn_memset_zero_len:
4350 return RValue::getIgnored();
4351 case Builtin::BI__annotation: {
4352 // Re-encode each wide string to UTF8 and make an MDString.
4354 for (const Expr *Arg : E->arguments()) {
4355 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4356 assert(Str->getCharByteWidth() == 2);
4357 StringRef WideBytes = Str->getBytes();
4358 std::string StrUtf8;
4359 if (!convertUTF16ToUTF8String(
4360 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4361 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4362 continue;
4363 }
4364 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4365 }
4366
4367 // Build and MDTuple of MDStrings and emit the intrinsic call.
4368 llvm::Function *F =
4369 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4370 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4371 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4372 return RValue::getIgnored();
4373 }
4374 case Builtin::BI__builtin_annotation: {
4375 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4376 llvm::Function *F =
4377 CGM.getIntrinsic(llvm::Intrinsic::annotation,
4378 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4379
4380 // Get the annotation string, go through casts. Sema requires this to be a
4381 // non-wide string literal, potentially casted, so the cast<> is safe.
4382 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4383 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4384 return RValue::get(
4385 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4386 }
4387 case Builtin::BI__builtin_addcb:
4388 case Builtin::BI__builtin_addcs:
4389 case Builtin::BI__builtin_addc:
4390 case Builtin::BI__builtin_addcl:
4391 case Builtin::BI__builtin_addcll:
4392 case Builtin::BI__builtin_subcb:
4393 case Builtin::BI__builtin_subcs:
4394 case Builtin::BI__builtin_subc:
4395 case Builtin::BI__builtin_subcl:
4396 case Builtin::BI__builtin_subcll: {
4397
4398 // We translate all of these builtins from expressions of the form:
4399 // int x = ..., y = ..., carryin = ..., carryout, result;
4400 // result = __builtin_addc(x, y, carryin, &carryout);
4401 //
4402 // to LLVM IR of the form:
4403 //
4404 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4405 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4406 // %carry1 = extractvalue {i32, i1} %tmp1, 1
4407 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4408 // i32 %carryin)
4409 // %result = extractvalue {i32, i1} %tmp2, 0
4410 // %carry2 = extractvalue {i32, i1} %tmp2, 1
4411 // %tmp3 = or i1 %carry1, %carry2
4412 // %tmp4 = zext i1 %tmp3 to i32
4413 // store i32 %tmp4, i32* %carryout
4414
4415 // Scalarize our inputs.
4416 llvm::Value *X = EmitScalarExpr(E->getArg(0));
4417 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4418 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
4419 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
4420
4421 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4422 llvm::Intrinsic::ID IntrinsicId;
4423 switch (BuiltinID) {
4424 default: llvm_unreachable("Unknown multiprecision builtin id.");
4425 case Builtin::BI__builtin_addcb:
4426 case Builtin::BI__builtin_addcs:
4427 case Builtin::BI__builtin_addc:
4428 case Builtin::BI__builtin_addcl:
4429 case Builtin::BI__builtin_addcll:
4430 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4431 break;
4432 case Builtin::BI__builtin_subcb:
4433 case Builtin::BI__builtin_subcs:
4434 case Builtin::BI__builtin_subc:
4435 case Builtin::BI__builtin_subcl:
4436 case Builtin::BI__builtin_subcll:
4437 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4438 break;
4439 }
4440
4441 // Construct our resulting LLVM IR expression.
4442 llvm::Value *Carry1;
4443 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
4444 X, Y, Carry1);
4445 llvm::Value *Carry2;
4446 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
4447 Sum1, Carryin, Carry2);
4448 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
4449 X->getType());
4450 Builder.CreateStore(CarryOut, CarryOutPtr);
4451 return RValue::get(Sum2);
4452 }
4453
4454 case Builtin::BI__builtin_add_overflow:
4455 case Builtin::BI__builtin_sub_overflow:
4456 case Builtin::BI__builtin_mul_overflow: {
4457 const clang::Expr *LeftArg = E->getArg(0);
4458 const clang::Expr *RightArg = E->getArg(1);
4459 const clang::Expr *ResultArg = E->getArg(2);
4460
4461 clang::QualType ResultQTy =
4462 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
4463
4464 WidthAndSignedness LeftInfo =
4466 WidthAndSignedness RightInfo =
4468 WidthAndSignedness ResultInfo =
4470
4471 // Handle mixed-sign multiplication as a special case, because adding
4472 // runtime or backend support for our generic irgen would be too expensive.
4473 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
4474 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
4475 RightInfo, ResultArg, ResultQTy,
4476 ResultInfo);
4477
4478 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
4479 ResultInfo))
4481 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
4482 ResultInfo);
4483
4484 WidthAndSignedness EncompassingInfo =
4485 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
4486
4487 llvm::Type *EncompassingLLVMTy =
4488 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
4489
4490 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
4491
4492 llvm::Intrinsic::ID IntrinsicId;
4493 switch (BuiltinID) {
4494 default:
4495 llvm_unreachable("Unknown overflow builtin id.");
4496 case Builtin::BI__builtin_add_overflow:
4497 IntrinsicId = EncompassingInfo.Signed
4498 ? llvm::Intrinsic::sadd_with_overflow
4499 : llvm::Intrinsic::uadd_with_overflow;
4500 break;
4501 case Builtin::BI__builtin_sub_overflow:
4502 IntrinsicId = EncompassingInfo.Signed
4503 ? llvm::Intrinsic::ssub_with_overflow
4504 : llvm::Intrinsic::usub_with_overflow;
4505 break;
4506 case Builtin::BI__builtin_mul_overflow:
4507 IntrinsicId = EncompassingInfo.Signed
4508 ? llvm::Intrinsic::smul_with_overflow
4509 : llvm::Intrinsic::umul_with_overflow;
4510 break;
4511 }
4512
4513 llvm::Value *Left = EmitScalarExpr(LeftArg);
4514 llvm::Value *Right = EmitScalarExpr(RightArg);
4515 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
4516
4517 // Extend each operand to the encompassing type.
4518 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
4519 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
4520
4521 // Perform the operation on the extended values.
4522 llvm::Value *Overflow, *Result;
4523 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
4524
4525 if (EncompassingInfo.Width > ResultInfo.Width) {
4526 // The encompassing type is wider than the result type, so we need to
4527 // truncate it.
4528 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
4529
4530 // To see if the truncation caused an overflow, we will extend
4531 // the result and then compare it to the original result.
4532 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
4533 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
4534 llvm::Value *TruncationOverflow =
4535 Builder.CreateICmpNE(Result, ResultTruncExt);
4536
4537 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
4538 Result = ResultTrunc;
4539 }
4540
4541 // Finally, store the result using the pointer.
4542 bool isVolatile =
4543 ResultArg->getType()->getPointeeType().isVolatileQualified();
4544 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
4545
4546 return RValue::get(Overflow);
4547 }
4548
4549 case Builtin::BI__builtin_uadd_overflow:
4550 case Builtin::BI__builtin_uaddl_overflow:
4551 case Builtin::BI__builtin_uaddll_overflow:
4552 case Builtin::BI__builtin_usub_overflow:
4553 case Builtin::BI__builtin_usubl_overflow:
4554 case Builtin::BI__builtin_usubll_overflow:
4555 case Builtin::BI__builtin_umul_overflow:
4556 case Builtin::BI__builtin_umull_overflow:
4557 case Builtin::BI__builtin_umulll_overflow:
4558 case Builtin::BI__builtin_sadd_overflow:
4559 case Builtin::BI__builtin_saddl_overflow:
4560 case Builtin::BI__builtin_saddll_overflow:
4561 case Builtin::BI__builtin_ssub_overflow:
4562 case Builtin::BI__builtin_ssubl_overflow:
4563 case Builtin::BI__builtin_ssubll_overflow:
4564 case Builtin::BI__builtin_smul_overflow:
4565 case Builtin::BI__builtin_smull_overflow:
4566 case Builtin::BI__builtin_smulll_overflow: {
4567
4568 // We translate all of these builtins directly to the relevant llvm IR node.
4569
4570 // Scalarize our inputs.
4571 llvm::Value *X = EmitScalarExpr(E->getArg(0));
4572 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4573 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
4574
4575 // Decide which of the overflow intrinsics we are lowering to:
4576 llvm::Intrinsic::ID IntrinsicId;
4577 switch (BuiltinID) {
4578 default: llvm_unreachable("Unknown overflow builtin id.");
4579 case Builtin::BI__builtin_uadd_overflow:
4580 case Builtin::BI__builtin_uaddl_overflow:
4581 case Builtin::BI__builtin_uaddll_overflow:
4582 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4583 break;
4584 case Builtin::BI__builtin_usub_overflow:
4585 case Builtin::BI__builtin_usubl_overflow:
4586 case Builtin::BI__builtin_usubll_overflow:
4587 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4588 break;
4589 case Builtin::BI__builtin_umul_overflow:
4590 case Builtin::BI__builtin_umull_overflow:
4591 case Builtin::BI__builtin_umulll_overflow:
4592 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
4593 break;
4594 case Builtin::BI__builtin_sadd_overflow:
4595 case Builtin::BI__builtin_saddl_overflow:
4596 case Builtin::BI__builtin_saddll_overflow:
4597 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
4598 break;
4599 case Builtin::BI__builtin_ssub_overflow:
4600 case Builtin::BI__builtin_ssubl_overflow:
4601 case Builtin::BI__builtin_ssubll_overflow:
4602 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
4603 break;
4604 case Builtin::BI__builtin_smul_overflow:
4605 case Builtin::BI__builtin_smull_overflow:
4606 case Builtin::BI__builtin_smulll_overflow:
4607 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
4608 break;
4609 }
4610
4611
4612 llvm::Value *Carry;
4613 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
4614 Builder.CreateStore(Sum, SumOutPtr);
4615
4616 return RValue::get(Carry);
4617 }
4618 case Builtin::BIaddressof:
4619 case Builtin::BI__addressof:
4620 case Builtin::BI__builtin_addressof:
4621 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
4622 case Builtin::BI__builtin_function_start:
4625 case Builtin::BI__builtin_operator_new:
4627 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
4628 case Builtin::BI__builtin_operator_delete:
4630 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
4631 return RValue::get(nullptr);
4632
4633 case Builtin::BI__builtin_is_aligned:
4634 return EmitBuiltinIsAligned(E);
4635 case Builtin::BI__builtin_align_up:
4636 return EmitBuiltinAlignTo(E, true);
4637 case Builtin::BI__builtin_align_down:
4638 return EmitBuiltinAlignTo(E, false);
4639
4640 case Builtin::BI__noop:
4641 // __noop always evaluates to an integer literal zero.
4642 return RValue::get(ConstantInt::get(IntTy, 0));
4643 case Builtin::BI__builtin_call_with_static_chain: {
4644 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
4645 const Expr *Chain = E->getArg(1);
4646 return EmitCall(Call->getCallee()->getType(),
4647 EmitCallee(Call->getCallee()), Call, ReturnValue,
4648 EmitScalarExpr(Chain));
4649 }
4650 case Builtin::BI_InterlockedExchange8:
4651 case Builtin::BI_InterlockedExchange16:
4652 case Builtin::BI_InterlockedExchange:
4653 case Builtin::BI_InterlockedExchangePointer:
4654 return RValue::get(
4655 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
4656 case Builtin::BI_InterlockedCompareExchangePointer:
4657 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
4658 llvm::Type *RTy;
4659 llvm::IntegerType *IntType = IntegerType::get(
4661
4662 llvm::Value *Destination = EmitScalarExpr(E->getArg(0));
4663
4664 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
4665 RTy = Exchange->getType();
4666 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
4667
4668 llvm::Value *Comparand =
4669 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
4670
4671 auto Ordering =
4672 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
4673 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
4674
4675 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
4676 Ordering, Ordering);
4677 Result->setVolatile(true);
4678
4679 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
4680 0),
4681 RTy));
4682 }
4683 case Builtin::BI_InterlockedCompareExchange8:
4684 case Builtin::BI_InterlockedCompareExchange16:
4685 case Builtin::BI_InterlockedCompareExchange:
4686 case Builtin::BI_InterlockedCompareExchange64:
4687 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
4688 case Builtin::BI_InterlockedIncrement16:
4689 case Builtin::BI_InterlockedIncrement:
4690 return RValue::get(
4691 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
4692 case Builtin::BI_InterlockedDecrement16:
4693 case Builtin::BI_InterlockedDecrement:
4694 return RValue::get(
4695 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
4696 case Builtin::BI_InterlockedAnd8:
4697 case Builtin::BI_InterlockedAnd16:
4698 case Builtin::BI_InterlockedAnd:
4699 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
4700 case Builtin::BI_InterlockedExchangeAdd8:
4701 case Builtin::BI_InterlockedExchangeAdd16:
4702 case Builtin::BI_InterlockedExchangeAdd:
4703 return RValue::get(
4704 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
4705 case Builtin::BI_InterlockedExchangeSub8:
4706 case Builtin::BI_InterlockedExchangeSub16:
4707 case Builtin::BI_InterlockedExchangeSub:
4708 return RValue::get(
4709 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
4710 case Builtin::BI_InterlockedOr8:
4711 case Builtin::BI_InterlockedOr16:
4712 case Builtin::BI_InterlockedOr:
4713 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
4714 case Builtin::BI_InterlockedXor8:
4715 case Builtin::BI_InterlockedXor16:
4716 case Builtin::BI_InterlockedXor:
4717 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
4718
4719 case Builtin::BI_bittest64:
4720 case Builtin::BI_bittest:
4721 case Builtin::BI_bittestandcomplement64:
4722 case Builtin::BI_bittestandcomplement:
4723 case Builtin::BI_bittestandreset64:
4724 case Builtin::BI_bittestandreset:
4725 case Builtin::BI_bittestandset64:
4726 case Builtin::BI_bittestandset:
4727 case Builtin::BI_interlockedbittestandreset:
4728 case Builtin::BI_interlockedbittestandreset64:
4729 case Builtin::BI_interlockedbittestandset64:
4730 case Builtin::BI_interlockedbittestandset:
4731 case Builtin::BI_interlockedbittestandset_acq:
4732 case Builtin::BI_interlockedbittestandset_rel:
4733 case Builtin::BI_interlockedbittestandset_nf:
4734 case Builtin::BI_interlockedbittestandreset_acq:
4735 case Builtin::BI_interlockedbittestandreset_rel:
4736 case Builtin::BI_interlockedbittestandreset_nf:
4737 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
4738
4739 // These builtins exist to emit regular volatile loads and stores not
4740 // affected by the -fms-volatile setting.
4741 case Builtin::BI__iso_volatile_load8:
4742 case Builtin::BI__iso_volatile_load16:
4743 case Builtin::BI__iso_volatile_load32:
4744 case Builtin::BI__iso_volatile_load64:
4745 return RValue::get(EmitISOVolatileLoad(*this, E));
4746 case Builtin::BI__iso_volatile_store8:
4747 case Builtin::BI__iso_volatile_store16:
4748 case Builtin::BI__iso_volatile_store32:
4749 case Builtin::BI__iso_volatile_store64:
4750 return RValue::get(EmitISOVolatileStore(*this, E));
4751
4752 case Builtin::BI__exception_code:
4753 case Builtin::BI_exception_code:
4755 case Builtin::BI__exception_info:
4756 case Builtin::BI_exception_info:
4758 case Builtin::BI__abnormal_termination:
4759 case Builtin::BI_abnormal_termination:
4761 case Builtin::BI_setjmpex:
4762 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
4763 E->getArg(0)->getType()->isPointerType())
4764 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
4765 break;
4766 case Builtin::BI_setjmp:
4767 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
4768 E->getArg(0)->getType()->isPointerType()) {
4769 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
4770 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
4771 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
4772 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
4773 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
4774 }
4775 break;
4776
4777 // C++ std:: builtins.
4778 case Builtin::BImove:
4779 case Builtin::BImove_if_noexcept:
4780 case Builtin::BIforward:
4781 case Builtin::BIforward_like:
4782 case Builtin::BIas_const:
4783 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
4784 case Builtin::BI__GetExceptionInfo: {
4785 if (llvm::GlobalVariable *GV =
4787 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
4788 break;
4789 }
4790
4791 case Builtin::BI__fastfail:
4792 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
4793
4794 case Builtin::BI__builtin_coro_id:
4795 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
4796 case Builtin::BI__builtin_coro_promise:
4797 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
4798 case Builtin::BI__builtin_coro_resume:
4799 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
4800 return RValue::get(nullptr);
4801 case Builtin::BI__builtin_coro_frame:
4802 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
4803 case Builtin::BI__builtin_coro_noop:
4804 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
4805 case Builtin::BI__builtin_coro_free:
4806 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
4807 case Builtin::BI__builtin_coro_destroy:
4808 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
4809 return RValue::get(nullptr);
4810 case Builtin::BI__builtin_coro_done:
4811 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
4812 case Builtin::BI__builtin_coro_alloc:
4813 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
4814 case Builtin::BI__builtin_coro_begin:
4815 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
4816 case Builtin::BI__builtin_coro_end:
4817 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
4818 case Builtin::BI__builtin_coro_suspend:
4819 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
4820 case Builtin::BI__builtin_coro_size:
4821 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
4822 case Builtin::BI__builtin_coro_align:
4823 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
4824
4825 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
4826 case Builtin::BIread_pipe:
4827 case Builtin::BIwrite_pipe: {
4828 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
4829 *Arg1 = EmitScalarExpr(E->getArg(1));
4830 CGOpenCLRuntime OpenCLRT(CGM);
4831 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4832 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4833
4834 // Type of the generic packet parameter.
4835 unsigned GenericAS =
4837 llvm::Type *I8PTy = llvm::PointerType::get(
4838 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
4839
4840 // Testing which overloaded version we should generate the call for.
4841 if (2U == E->getNumArgs()) {
4842 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
4843 : "__write_pipe_2";
4844 // Creating a generic function type to be able to call with any builtin or
4845 // user defined type.
4846 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
4847 llvm::FunctionType *FTy = llvm::FunctionType::get(
4848 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4849 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
4850 return RValue::get(
4852 {Arg0, BCast, PacketSize, PacketAlign}));
4853 } else {
4854 assert(4 == E->getNumArgs() &&
4855 "Illegal number of parameters to pipe function");
4856 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
4857 : "__write_pipe_4";
4858
4859 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
4860 Int32Ty, Int32Ty};
4861 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
4862 *Arg3 = EmitScalarExpr(E->getArg(3));
4863 llvm::FunctionType *FTy = llvm::FunctionType::get(
4864 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4865 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
4866 // We know the third argument is an integer type, but we may need to cast
4867 // it to i32.
4868 if (Arg2->getType() != Int32Ty)
4869 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
4870 return RValue::get(
4872 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
4873 }
4874 }
4875 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
4876 // functions
4877 case Builtin::BIreserve_read_pipe:
4878 case Builtin::BIreserve_write_pipe:
4879 case Builtin::BIwork_group_reserve_read_pipe:
4880 case Builtin::BIwork_group_reserve_write_pipe:
4881 case Builtin::BIsub_group_reserve_read_pipe:
4882 case Builtin::BIsub_group_reserve_write_pipe: {
4883 // Composing the mangled name for the function.
4884 const char *Name;
4885 if (BuiltinID == Builtin::BIreserve_read_pipe)
4886 Name = "__reserve_read_pipe";
4887 else if (BuiltinID == Builtin::BIreserve_write_pipe)
4888 Name = "__reserve_write_pipe";
4889 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
4890 Name = "__work_group_reserve_read_pipe";
4891 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
4892 Name = "__work_group_reserve_write_pipe";
4893 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
4894 Name = "__sub_group_reserve_read_pipe";
4895 else
4896 Name = "__sub_group_reserve_write_pipe";
4897
4898 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
4899 *Arg1 = EmitScalarExpr(E->getArg(1));
4900 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
4901 CGOpenCLRuntime OpenCLRT(CGM);
4902 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4903 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4904
4905 // Building the generic function prototype.
4906 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
4907 llvm::FunctionType *FTy = llvm::FunctionType::get(
4908 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4909 // We know the second argument is an integer type, but we may need to cast
4910 // it to i32.
4911 if (Arg1->getType() != Int32Ty)
4912 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
4914 {Arg0, Arg1, PacketSize, PacketAlign}));
4915 }
4916 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
4917 // functions
4918 case Builtin::BIcommit_read_pipe:
4919 case Builtin::BIcommit_write_pipe:
4920 case Builtin::BIwork_group_commit_read_pipe:
4921 case Builtin::BIwork_group_commit_write_pipe:
4922 case Builtin::BIsub_group_commit_read_pipe:
4923 case Builtin::BIsub_group_commit_write_pipe: {
4924 const char *Name;
4925 if (BuiltinID == Builtin::BIcommit_read_pipe)
4926 Name = "__commit_read_pipe";
4927 else if (BuiltinID == Builtin::BIcommit_write_pipe)
4928 Name = "__commit_write_pipe";
4929 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
4930 Name = "__work_group_commit_read_pipe";
4931 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
4932 Name = "__work_group_commit_write_pipe";
4933 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
4934 Name = "__sub_group_commit_read_pipe";
4935 else
4936 Name = "__sub_group_commit_write_pipe";
4937
4938 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
4939 *Arg1 = EmitScalarExpr(E->getArg(1));
4940 CGOpenCLRuntime OpenCLRT(CGM);
4941 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4942 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4943
4944 // Building the generic function prototype.
4945 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
4946 llvm::FunctionType *FTy =
4947 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
4948 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4949
4951 {Arg0, Arg1, PacketSize, PacketAlign}));
4952 }
4953 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
4954 case Builtin::BIget_pipe_num_packets:
4955 case Builtin::BIget_pipe_max_packets: {
4956 const char *BaseName;
4957 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
4958 if (BuiltinID == Builtin::BIget_pipe_num_packets)
4959 BaseName = "__get_pipe_num_packets";
4960 else
4961 BaseName = "__get_pipe_max_packets";
4962 std::string Name = std::string(BaseName) +
4963 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
4964
4965 // Building the generic function prototype.
4966 Value *Arg0 = EmitScalarExpr(E->getArg(0));
4967 CGOpenCLRuntime OpenCLRT(CGM);
4968 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
4969 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
4970 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
4971 llvm::FunctionType *FTy = llvm::FunctionType::get(
4972 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
4973
4975 {Arg0, PacketSize, PacketAlign}));
4976 }
4977
4978 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
4979 case Builtin::BIto_global:
4980 case Builtin::BIto_local:
4981 case Builtin::BIto_private: {
4982 auto Arg0 = EmitScalarExpr(E->getArg(0));
4983 auto NewArgT = llvm::PointerType::get(Int8Ty,
4985 auto NewRetT = llvm::PointerType::get(Int8Ty,
4988 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
4989 llvm::Value *NewArg;
4990 if (Arg0->getType()->getPointerAddressSpace() !=
4991 NewArgT->getPointerAddressSpace())
4992 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
4993 else
4994 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
4995 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
4996 auto NewCall =
4997 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
4998 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
4999 ConvertType(E->getType())));
5000 }
5001
5002 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5003 // It contains four different overload formats specified in Table 6.13.17.1.
5004 case Builtin::BIenqueue_kernel: {
5005 StringRef Name; // Generated function call name
5006 unsigned NumArgs = E->getNumArgs();
5007
5008 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5009 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5010 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5011
5012 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5013 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5014 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5015 llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
5016 llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5017
5018 if (NumArgs == 4) {
5019 // The most basic form of the call with parameters:
5020 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5021 Name = "__enqueue_kernel_basic";
5022 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5023 GenericVoidPtrTy};
5024 llvm::FunctionType *FTy = llvm::FunctionType::get(
5025 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5026
5027 auto Info =
5029 llvm::Value *Kernel =
5030 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5031 llvm::Value *Block =
5032 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5033
5034 AttrBuilder B(Builder.getContext());
5035 B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5036 llvm::AttributeList ByValAttrSet =
5037 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5038
5039 auto RTCall =
5040 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5041 {Queue, Flags, Range, Kernel, Block});
5042 RTCall->setAttributes(ByValAttrSet);
5043 return RValue::get(RTCall);
5044 }
5045 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5046
5047 // Create a temporary array to hold the sizes of local pointer arguments
5048 // for the block. \p First is the position of the first size argument.
5049 auto CreateArrayForSizeVar = [=](unsigned First)
5050 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5051 llvm::APInt ArraySize(32, NumArgs - First);
5053 getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
5054 /*IndexTypeQuals=*/0);
5055 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5056 llvm::Value *TmpPtr = Tmp.getPointer();
5057 llvm::Value *TmpSize = EmitLifetimeStart(
5058 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5059 llvm::Value *ElemPtr;
5060 // Each of the following arguments specifies the size of the corresponding
5061 // argument passed to the enqueued block.
5062 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5063 for (unsigned I = First; I < NumArgs; ++I) {
5064 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5065 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5066 {Zero, Index});
5067 if (I == First)
5068 ElemPtr = GEP;
5069 auto *V =
5070 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5071 Builder.CreateAlignedStore(
5072 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5073 }
5074 return std::tie(ElemPtr, TmpSize, TmpPtr);
5075 };
5076
5077 // Could have events and/or varargs.
5078 if (E->getArg(3)->getType()->isBlockPointerType()) {
5079 // No events passed, but has variadic arguments.
5080 Name = "__enqueue_kernel_varargs";
5081 auto Info =
5083 llvm::Value *Kernel =
5084 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5085 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5086 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5087 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5088
5089 // Create a vector of the arguments, as well as a constant value to
5090 // express to the runtime the number of variadic arguments.
5091 llvm::Value *const Args[] = {Queue, Flags,
5092 Range, Kernel,
5093 Block, ConstantInt::get(IntTy, NumArgs - 4),
5094 ElemPtr};
5095 llvm::Type *const ArgTys[] = {
5096 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5097 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5098
5099 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5100 auto Call = RValue::get(
5101 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5102 if (TmpSize)
5103 EmitLifetimeEnd(TmpSize, TmpPtr);
5104 return Call;
5105 }
5106 // Any calls now have event arguments passed.
5107 if (NumArgs >= 7) {
5108 llvm::PointerType *PtrTy = llvm::PointerType::get(
5111
5112 llvm::Value *NumEvents =
5113 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5114
5115 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5116 // to be a null pointer constant (including `0` literal), we can take it
5117 // into account and emit null pointer directly.
5118 llvm::Value *EventWaitList = nullptr;
5119 if (E->getArg(4)->isNullPointerConstant(
5121 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5122 } else {
5123 EventWaitList = E->getArg(4)->getType()->isArrayType()
5125 : EmitScalarExpr(E->getArg(4));
5126 // Convert to generic address space.
5127 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5128 }
5129 llvm::Value *EventRet = nullptr;
5130 if (E->getArg(5)->isNullPointerConstant(
5132 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5133 } else {
5134 EventRet =
5135 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5136 }
5137
5138 auto Info =
5140 llvm::Value *Kernel =
5141 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5142 llvm::Value *Block =
5143 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5144
5145 std::vector<llvm::Type *> ArgTys = {
5146 QueueTy, Int32Ty, RangeTy, Int32Ty,
5147 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5148
5149 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5150 NumEvents, EventWaitList, EventRet,
5151 Kernel, Block};
5152
5153 if (NumArgs == 7) {
5154 // Has events but no variadics.
5155 Name = "__enqueue_kernel_basic_events";
5156 llvm::FunctionType *FTy = llvm::FunctionType::get(
5157 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5158 return RValue::get(
5161 }
5162 // Has event info and variadics
5163 // Pass the number of variadics to the runtime function too.
5164 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5165 ArgTys.push_back(Int32Ty);
5166 Name = "__enqueue_kernel_events_varargs";
5167
5168 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5169 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5170 Args.push_back(ElemPtr);
5171 ArgTys.push_back(ElemPtr->getType());
5172
5173 llvm::FunctionType *FTy = llvm::FunctionType::get(
5174 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5175 auto Call =
5178 if (TmpSize)
5179 EmitLifetimeEnd(TmpSize, TmpPtr);
5180 return Call;
5181 }
5182 [[fallthrough]];
5183 }
5184 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5185 // parameter.
5186 case Builtin::BIget_kernel_work_group_size: {
5187 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5188 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5189 auto Info =
5191 Value *Kernel =
5192 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5193 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5196 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5197 false),
5198 "__get_kernel_work_group_size_impl"),
5199 {Kernel, Arg}));
5200 }
5201 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5202 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5203 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5204 auto Info =
5206 Value *Kernel =
5207 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5208 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5211 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5212 false),
5213 "__get_kernel_preferred_work_group_size_multiple_impl"),
5214 {Kernel, Arg}));
5215 }
5216 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5217 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5218 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
5219 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5220 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5221 llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
5222 auto Info =
5224 Value *Kernel =
5225 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5226 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5227 const char *Name =
5228 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5229 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5230 : "__get_kernel_sub_group_count_for_ndrange_impl";
5233 llvm::FunctionType::get(
5234 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5235 false),
5236 Name),
5237 {NDRange, Kernel, Block}));
5238 }
5239
5240 case Builtin::BI__builtin_store_half:
5241 case Builtin::BI__builtin_store_halff: {
5242 Value *Val = EmitScalarExpr(E->getArg(0));
5244 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5245 Builder.CreateStore(HalfVal, Address);
5246 return RValue::get(nullptr);
5247 }
5248 case Builtin::BI__builtin_load_half: {
5250 Value *HalfVal = Builder.CreateLoad(Address);
5251 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5252 }
5253 case Builtin::BI__builtin_load_halff: {
5255 Value *HalfVal = Builder.CreateLoad(Address);
5256 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5257 }
5258 case Builtin::BIprintf:
5259 if (getTarget().getTriple().isNVPTX() ||
5260 getTarget().getTriple().isAMDGCN()) {
5261 if (getLangOpts().OpenMPIsTargetDevice)
5263 if (getTarget().getTriple().isNVPTX())
5265 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5267 }
5268
5269 break;
5270 case Builtin::BI__builtin_canonicalize:
5271 case Builtin::BI__builtin_canonicalizef:
5272 case Builtin::BI__builtin_canonicalizef16:
5273 case Builtin::BI__builtin_canonicalizel:
5274 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5275
5276 case Builtin::BI__builtin_thread_pointer: {
5277 if (!getContext().getTargetInfo().isTLSSupported())
5278 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5279 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5280 break;
5281 }
5282 case Builtin::BI__builtin_os_log_format:
5283 return emitBuiltinOSLogFormat(*E);
5284
5285 case Builtin::BI__xray_customevent: {
5287 return RValue::getIgnored();
5288
5291 return RValue::getIgnored();
5292
5293 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5294 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5295 return RValue::getIgnored();
5296
5297 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5298 auto FTy = F->getFunctionType();
5299 auto Arg0 = E->getArg(0);
5300 auto Arg0Val = EmitScalarExpr(Arg0);
5301 auto Arg0Ty = Arg0->getType();
5302 auto PTy0 = FTy->getParamType(0);
5303 if (PTy0 != Arg0Val->getType()) {
5304 if (Arg0Ty->isArrayType())
5305 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
5306 else
5307 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5308 }
5309 auto Arg1 = EmitScalarExpr(E->getArg(1));
5310 auto PTy1 = FTy->getParamType(1);
5311 if (PTy1 != Arg1->getType())
5312 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5313 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5314 }
5315
5316 case Builtin::BI__xray_typedevent: {
5317 // TODO: There should be a way to always emit events even if the current
5318 // function is not instrumented. Losing events in a stream can cripple
5319 // a trace.
5321 return RValue::getIgnored();
5322
5325 return RValue::getIgnored();
5326
5327 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5328 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5329 return RValue::getIgnored();
5330
5331 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
5332 auto FTy = F->getFunctionType();
5333 auto Arg0 = EmitScalarExpr(E->getArg(0));
5334 auto PTy0 = FTy->getParamType(0);
5335 if (PTy0 != Arg0->getType())
5336 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
5337 auto Arg1 = E->getArg(1);
5338 auto Arg1Val = EmitScalarExpr(Arg1);
5339 auto Arg1Ty = Arg1->getType();
5340 auto PTy1 = FTy->getParamType(1);
5341 if (PTy1 != Arg1Val->getType()) {
5342 if (Arg1Ty->isArrayType())
5343 Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
5344 else
5345 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
5346 }
5347 auto Arg2 = EmitScalarExpr(E->getArg(2));
5348 auto PTy2 = FTy->getParamType(2);
5349 if (PTy2 != Arg2->getType())
5350 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
5351 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
5352 }
5353
5354 case Builtin::BI__builtin_ms_va_start:
5355 case Builtin::BI__builtin_ms_va_end:
5356 return RValue::get(
5358 BuiltinID == Builtin::BI__builtin_ms_va_start));
5359
5360 case Builtin::BI__builtin_ms_va_copy: {
5361 // Lower this manually. We can't reliably determine whether or not any
5362 // given va_copy() is for a Win64 va_list from the calling convention
5363 // alone, because it's legal to do this from a System V ABI function.
5364 // With opaque pointer types, we won't have enough information in LLVM
5365 // IR to determine this from the argument types, either. Best to do it
5366 // now, while we have enough information.
5367 Address DestAddr = EmitMSVAListRef(E->getArg(0));
5368 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5369
5370 llvm::Type *BPP = Int8PtrPtrTy;
5371
5372 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
5373 Int8PtrTy, DestAddr.getAlignment());
5374 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
5375 Int8PtrTy, SrcAddr.getAlignment());
5376
5377 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5378 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
5379 }
5380
5381 case Builtin::BI__builtin_get_device_side_mangled_name: {
5382 auto Name = CGM.getCUDARuntime().getDeviceSideName(
5383 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5384 auto Str = CGM.GetAddrOfConstantCString(Name, "");
5385 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5386 llvm::ConstantInt::get(SizeTy, 0)};
5387 auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5388 Str.getPointer(), Zeros);
5389 return RValue::get(Ptr);
5390 }
5391 }
5392
5393 // If this is an alias for a lib function (e.g. __builtin_sin), emit
5394 // the call using the normal call path, but using the unmangled
5395 // version of the function name.
5396 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
5397 return emitLibraryCall(*this, FD, E,
5398 CGM.getBuiltinLibFunction(FD, BuiltinID));
5399
5400 // If this is a predefined lib function (e.g. malloc), emit the call
5401 // using exactly the normal call path.
5402 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
5403 return emitLibraryCall(*this, FD, E,
5404 cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
5405
5406 // Check that a call to a target specific builtin has the correct target
5407 // features.
5408 // This is down here to avoid non-target specific builtins, however, if
5409 // generic builtins start to require generic target features then we
5410 // can move this up to the beginning of the function.
5411 checkTargetFeatures(E, FD);
5412
5413 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
5414 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
5415
5416 // See if we have a target specific intrinsic.
5417 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
5418 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
5419 StringRef Prefix =
5420 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
5421 if (!Prefix.empty()) {
5422 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
5423 // NOTE we don't need to perform a compatibility flag check here since the
5424 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
5425 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
5426 if (IntrinsicID == Intrinsic::not_intrinsic)
5427 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
5428 }
5429
5430 if (IntrinsicID != Intrinsic::not_intrinsic) {
5432
5433 // Find out if any arguments are required to be integer constant
5434 // expressions.
5435 unsigned ICEArguments = 0;
5437 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5438 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5439
5440 Function *F = CGM.getIntrinsic(IntrinsicID);
5441 llvm::FunctionType *FTy = F->getFunctionType();
5442
5443 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
5444 Value *ArgValue;
5445 // If this is a normal argument, just emit it as a scalar.
5446 if ((ICEArguments & (1 << i)) == 0) {
5447 ArgValue = EmitScalarExpr(E->getArg(i));
5448 } else {
5449 // If this is required to be a constant, constant fold it so that we
5450 // know that the generated intrinsic gets a ConstantInt.
5451 ArgValue = llvm::ConstantInt::get(
5454 }
5455
5456 // If the intrinsic arg type is different from the builtin arg type
5457 // we need to do a bit cast.
5458 llvm::Type *PTy = FTy->getParamType(i);
5459 if (PTy != ArgValue->getType()) {
5460 // XXX - vector of pointers?
5461 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
5462 if (PtrTy->getAddressSpace() !=
5463 ArgValue->getType()->getPointerAddressSpace()) {
5464 ArgValue = Builder.CreateAddrSpaceCast(
5465 ArgValue, llvm::PointerType::get(getLLVMContext(),
5466 PtrTy->getAddressSpace()));
5467 }
5468 }
5469
5470 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
5471 "Must be able to losslessly bit cast to param");
5472 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
5473 // in amx intrinsics.
5474 if (PTy->isX86_AMXTy())
5475 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
5476 {ArgValue->getType()}, {ArgValue});
5477 else
5478 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
5479 }
5480
5481 Args.push_back(ArgValue);
5482 }
5483
5484 Value *V = Builder.CreateCall(F, Args);
5485 QualType BuiltinRetType = E->getType();
5486
5487 llvm::Type *RetTy = VoidTy;
5488 if (!BuiltinRetType->isVoidType())
5489 RetTy = ConvertType(BuiltinRetType);
5490
5491 if (RetTy != V->getType()) {
5492 // XXX - vector of pointers?
5493 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
5494 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
5495 V = Builder.CreateAddrSpaceCast(
5496 V, llvm::PointerType::get(getLLVMContext(),
5497 PtrTy->getAddressSpace()));
5498 }
5499 }
5500
5501 assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
5502 "Must be able to losslessly bit cast result type");
5503 // Cast x86_amx to vector type (e.g., v256i32), this only happen
5504 // in amx intrinsics.
5505 if (V->getType()->isX86_AMXTy())
5506 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
5507 {V});
5508 else
5509 V = Builder.CreateBitCast(V, RetTy);
5510 }
5511
5512 if (RetTy->isVoidTy())
5513 return RValue::get(nullptr);
5514
5515 return RValue::get(V);
5516 }
5517
5518 // Some target-specific builtins can have aggregate return values, e.g.
5519 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
5520 // ReturnValue to be non-null, so that the target-specific emission code can
5521 // always just emit into it.
5523 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
5524 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
5525 ReturnValue = ReturnValueSlot(DestPtr, false);
5526 }
5527
5528 // Now see if we can emit a target-specific builtin.
5529 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
5530 switch (EvalKind) {
5531 case TEK_Scalar:
5532 if (V->getType()->isVoidTy())
5533 return RValue::get(nullptr);
5534 return RValue::get(V);
5535 case TEK_Aggregate:
5536 return RValue::getAggregate(ReturnValue.getValue(),
5537 ReturnValue.isVolatile());
5538 case TEK_Complex:
5539 llvm_unreachable("No current target builtin returns complex");
5540 }
5541 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
5542 }
5543
5544 ErrorUnsupported(E, "builtin function");
5545
5546 // Unknown builtin, for now just dump it out and return undef.
5547 return GetUndefRValue(E->getType());
5548}
5549
5551 unsigned BuiltinID, const CallExpr *E,
5552 ReturnValueSlot ReturnValue,
5553 llvm::Triple::ArchType Arch) {
5554 switch (Arch) {
5555 case llvm::Triple::arm:
5556 case llvm::Triple::armeb:
5557 case llvm::Triple::thumb:
5558 case llvm::Triple::thumbeb:
5559 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
5560 case llvm::Triple::aarch64:
5561 case llvm::Triple::aarch64_32:
5562 case llvm::Triple::aarch64_be:
5563 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
5564 case llvm::Triple::bpfeb:
5565 case llvm::Triple::bpfel:
5566 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
5567 case llvm::Triple::x86:
5568 case llvm::Triple::x86_64:
5569 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
5570 case llvm::Triple::ppc:
5571 case llvm::Triple::ppcle:
5572 case llvm::Triple::ppc64:
5573 case llvm::Triple::ppc64le:
5574 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
5575 case llvm::Triple::r600:
5576 case llvm::Triple::amdgcn:
5577 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
5578 case llvm::Triple::systemz:
5579 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
5580 case llvm::Triple::nvptx:
5581 case llvm::Triple::nvptx64:
5582 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
5583 case llvm::Triple::wasm32:
5584 case llvm::Triple::wasm64:
5585 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
5586 case llvm::Triple::hexagon:
5587 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
5588 case llvm::Triple::riscv32:
5589 case llvm::Triple::riscv64:
5590 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
5591 case llvm::Triple::loongarch32:
5592 case llvm::Triple::loongarch64:
5593 return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E);
5594 default:
5595 return nullptr;
5596 }
5597}
5598
5600 const CallExpr *E,
5601 ReturnValueSlot ReturnValue) {
5602 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
5603 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
5605 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
5606 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
5607 }
5608
5609 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
5610 getTarget().getTriple().getArch());
5611}
5612
5613static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
5614 NeonTypeFlags TypeFlags,
5615 bool HasLegalHalfType = true,
5616 bool V1Ty = false,
5617 bool AllowBFloatArgsAndRet = true) {
5618 int IsQuad = TypeFlags.isQuad();
5619 switch (TypeFlags.getEltType()) {
5622 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
5625 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
5627 if (AllowBFloatArgsAndRet)
5628 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
5629 else
5630 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
5632 if (HasLegalHalfType)
5633 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
5634 else
5635 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
5637 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
5640 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
5642 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
5643 // There is a lot of i128 and f128 API missing.
5644 // so we use v16i8 to represent poly128 and get pattern matched.
5645 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
5647 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
5649 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
5650 }
5651 llvm_unreachable("Unknown vector element type!");
5652}
5653
5654static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
5655 NeonTypeFlags IntTypeFlags) {
5656 int IsQuad = IntTypeFlags.isQuad();
5657 switch (IntTypeFlags.getEltType()) {
5659 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
5661 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
5663 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
5664 default:
5665 llvm_unreachable("Type can't be converted to floating-point!");
5666 }
5667}
5668
5670 const ElementCount &Count) {
5671 Value *SV = llvm::ConstantVector::getSplat(Count, C);
5672 return Builder.CreateShuffleVector(V, V, SV, "lane");
5673}
5674
5676 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
5677 return EmitNeonSplat(V, C, EC);
5678}
5679
5681 const char *name,
5682 unsigned shift, bool rightshift) {
5683 unsigned j = 0;
5684 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
5685 ai != ae; ++ai, ++j) {
5686 if (F->isConstrainedFPIntrinsic())
5687 if (ai->getType()->isMetadataTy())
5688 continue;
5689 if (shift > 0 && shift == j)
5690 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
5691 else
5692 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
5693 }
5694
5695 if (F->isConstrainedFPIntrinsic())
5696 return Builder.CreateConstrainedFPCall(F, Ops, name);
5697 else
5698 return Builder.CreateCall(F, Ops, name);
5699}
5700
5702 bool neg) {
5703 int SV = cast<ConstantInt>(V)->getSExtValue();
5704 return ConstantInt::get(Ty, neg ? -SV : SV);
5705}
5706
5707// Right-shift a vector by a constant.
5709 llvm::Type *Ty, bool usgn,
5710 const char *name) {
5711 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5712
5713 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
5714 int EltSize = VTy->getScalarSizeInBits();
5715
5716 Vec = Builder.CreateBitCast(Vec, Ty);
5717
5718 // lshr/ashr are undefined when the shift amount is equal to the vector
5719 // element size.
5720 if (ShiftAmt == EltSize) {
5721 if (usgn) {
5722 // Right-shifting an unsigned value by its size yields 0.
5723 return llvm::ConstantAggregateZero::get(VTy);
5724 } else {
5725 // Right-shifting a signed value by its size is equivalent
5726 // to a shift of size-1.
5727 --ShiftAmt;
5728 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
5729 }
5730 }
5731
5732 Shift = EmitNeonShiftVector(Shift, Ty, false);
5733 if (usgn)
5734 return Builder.CreateLShr(Vec, Shift, name);
5735 else
5736 return Builder.CreateAShr(Vec, Shift, name);
5737}
5738
5739enum {
5740 AddRetType = (1 << 0),
5741 Add1ArgType = (1 << 1),
5742 Add2ArgTypes = (1 << 2),
5743
5746
5748 UnsignedAlts = (1 << 6),
5749
5752
5760
5761namespace {
5762struct ARMVectorIntrinsicInfo {
5763 const char *NameHint;
5764 unsigned BuiltinID;
5765 unsigned LLVMIntrinsic;
5766 unsigned AltLLVMIntrinsic;
5767 uint64_t TypeModifier;
5768
5769 bool operator<(unsigned RHSBuiltinID) const {
5770 return BuiltinID < RHSBuiltinID;
5771 }
5772 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
5773 return BuiltinID < TE.BuiltinID;
5774 }
5775};
5776} // end anonymous namespace
5777
5778#define NEONMAP0(NameBase) \
5779 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
5780
5781#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
5782 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
5783 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
5784
5785#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
5786 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
5787 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
5788 TypeModifier }
5789
5790static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
5791 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
5792 NEONMAP0(splat_lane_v),
5793 NEONMAP0(splat_laneq_v),
5794 NEONMAP0(splatq_lane_v),
5795 NEONMAP0(splatq_laneq_v),
5796 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
5797 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
5798 NEONMAP1(vabs_v, arm_neon_vabs, 0),
5799 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
5800 NEONMAP0(vadd_v),
5801 NEONMAP0(vaddhn_v),
5802 NEONMAP0(vaddq_v),
5803 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
5804 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
5805 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
5806 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
5807 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
5808 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
5809 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
5810 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
5811 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
5812 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
5813 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
5814 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
5815 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
5816 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
5817 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
5818 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
5819 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
5820 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
5821 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
5822 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
5823 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
5824 NEONMAP1(vcage_v, arm_neon_vacge, 0),
5825 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
5826 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
5827 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
5828 NEONMAP1(vcale_v, arm_neon_vacge, 0),
5829 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
5830 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
5831 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
5832 NEONMAP0(vceqz_v),
5833 NEONMAP0(vceqzq_v),
5834 NEONMAP0(vcgez_v),
5835 NEONMAP0(vcgezq_v),
5836 NEONMAP0(vcgtz_v),
5837 NEONMAP0(vcgtzq_v),
5838 NEONMAP0(vclez_v),
5839 NEONMAP0(vclezq_v),
5840 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
5841 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
5842 NEONMAP0(vcltz_v),
5843 NEONMAP0(vcltzq_v),
5844 NEONMAP1(vclz_v, ctlz, Add1ArgType),
5845 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
5846 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
5847 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
5848 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
5849 NEONMAP0(vcvt_f16_s16),
5850 NEONMAP0(vcvt_f16_u16),
5851 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
5852 NEONMAP0(vcvt_f32_v),
5853 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
5854 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
5855 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
5856 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
5857 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
5858 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
5859 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
5860 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
5861 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
5862 NEONMAP0(vcvt_s16_f16),
5863 NEONMAP0(vcvt_s32_v),
5864 NEONMAP0(vcvt_s64_v),
5865 NEONMAP0(vcvt_u16_f16),
5866 NEONMAP0(vcvt_u32_v),
5867 NEONMAP0(vcvt_u64_v),
5868 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
5869 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
5870 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
5871 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
5872 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
5873 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
5874 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
5875 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
5876 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
5877 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
5878 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
5879 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
5880 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
5881 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
5882 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
5883 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
5884 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
5885 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
5886 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
5887 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
5888 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
5889 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
5890 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
5891 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
5892 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
5893 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
5894 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
5895 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
5896 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
5897 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
5898 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
5899 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
5900 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
5901 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
5902 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
5903 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
5904 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
5905 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
5906 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
5907 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
5908 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
5909 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
5910 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
5911 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
5912 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
5913 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
5914 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
5915 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
5916 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
5917 NEONMAP0(vcvtq_f16_s16),
5918 NEONMAP0(vcvtq_f16_u16),
5919 NEONMAP0(vcvtq_f32_v),
5920 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
5921 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
5922 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
5923 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
5924 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
5925 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
5926 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
5927 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
5928 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
5929 NEONMAP0(vcvtq_s16_f16),
5930 NEONMAP0(vcvtq_s32_v),
5931 NEONMAP0(vcvtq_s64_v),
5932 NEONMAP0(vcvtq_u16_f16),
5933 NEONMAP0(vcvtq_u32_v),
5934 NEONMAP0(vcvtq_u64_v),
5935 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
5936 NEONMAP1(vdot_u32, arm_neon_udot, 0),
5937 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
5938 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
5939 NEONMAP0(vext_v),
5940 NEONMAP0(vextq_v),
5941 NEONMAP0(vfma_v),
5942 NEONMAP0(vfmaq_v),
5943 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
5944 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
5945 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
5946 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
5947 NEONMAP0(vld1_dup_v),
5948 NEONMAP1(vld1_v, arm_neon_vld1, 0),
5949 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
5950 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
5951 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
5952 NEONMAP0(vld1q_dup_v),
5953 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
5954 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
5955 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
5956 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
5957 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
5958 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
5959 NEONMAP1(vld2_v, arm_neon_vld2, 0),
5960 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
5961 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
5962 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
5963 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
5964 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
5965 NEONMAP1(vld3_v, arm_neon_vld3, 0),
5966 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
5967 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
5968 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
5969 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
5970 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
5971 NEONMAP1(vld4_v, arm_neon_vld4, 0),
5972 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
5973 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
5974 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
5975 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
5976 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
5977 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
5978 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
5979 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
5980 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
5981 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
5982 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
5983 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
5984 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
5985 NEONMAP0(vmovl_v),
5986 NEONMAP0(vmovn_v),
5987 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
5988 NEONMAP0(vmull_v),
5989 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
5990 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
5991 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
5992 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
5993 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
5994 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
5995 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
5996 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
5997 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
5998 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
5999 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6000 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6001 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6002 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6003 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6004 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6005 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6006 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6007 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6008 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6009 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6010 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6011 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6012 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6013 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6014 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6015 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6016 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6017 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6018 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6019 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6020 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6021 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6022 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6023 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6024 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6025 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6026 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6027 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6028 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6029 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6030 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6031 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6032 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6033 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6034 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6035 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6036 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6037 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6038 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6039 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6040 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6041 NEONMAP0(vrndi_v),
6042 NEONMAP0(vrndiq_v),
6043 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6044 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6045 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6046 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6047 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6048 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6049 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6050 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6051 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6052 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6053 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6054 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6055 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6056 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6057 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6058 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6059 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6060 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6061 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6062 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6063 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6064 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6065 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6066 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6067 NEONMAP0(vshl_n_v),
6068 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6069 NEONMAP0(vshll_n_v),
6070 NEONMAP0(vshlq_n_v),
6071 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6072 NEONMAP0(vshr_n_v),
6073 NEONMAP0(vshrn_n_v),
6074 NEONMAP0(vshrq_n_v),
6075 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6076 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6077 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6078 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6079 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6080 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6081 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6082 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6083 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6084 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6085 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6086 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6087 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6088 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6089 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6090 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6091 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6092 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6093 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6094 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6095 NEONMAP0(vsubhn_v),
6096 NEONMAP0(vtrn_v),
6097 NEONMAP0(vtrnq_v),
6098 NEONMAP0(vtst_v),
6099 NEONMAP0(vtstq_v),
6100 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6101 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6102 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6103 NEONMAP0(vuzp_v),
6104 NEONMAP0(vuzpq_v),
6105 NEONMAP0(vzip_v),
6106 NEONMAP0(vzipq_v)
6107};
6108
6109static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6110 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6111 NEONMAP0(splat_lane_v),
6112 NEONMAP0(splat_laneq_v),
6113 NEONMAP0(splatq_lane_v),
6114 NEONMAP0(splatq_laneq_v),
6115 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6116 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6117 NEONMAP0(vadd_v),
6118 NEONMAP0(vaddhn_v),
6119 NEONMAP0(vaddq_p128),
6120 NEONMAP0(vaddq_v),
6121 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6122 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6123 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6124 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6125 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6126 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6127 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6128 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6129 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6130 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6131 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6132 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6133 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6134 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6135 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6136 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6137 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6138 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6139 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6140 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6141 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6142 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6143 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6144 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6145 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6146 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6147 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6148 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6149 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6150 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6151 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6152 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6153 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6154 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6155 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6156 NEONMAP0(vceqz_v),
6157 NEONMAP0(vceqzq_v),
6158 NEONMAP0(vcgez_v),
6159 NEONMAP0(vcgezq_v),
6160 NEONMAP0(vcgtz_v),
6161 NEONMAP0(vcgtzq_v),
6162 NEONMAP0(vclez_v),
6163 NEONMAP0(vclezq_v),
6164 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6165 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6166 NEONMAP0(vcltz_v),
6167 NEONMAP0(vcltzq_v),
6168 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6169 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6170 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6171 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6172 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6173 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6174 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6175 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6176 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6177 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6178 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6179 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6180 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6181 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6182 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6183 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6184 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6185 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6186 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6187 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6188 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6189 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6190 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6191 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6192 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6193 NEONMAP0(vcvt_f16_s16),
6194 NEONMAP0(vcvt_f16_u16),
6195 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6196 NEONMAP0(vcvt_f32_v),
6197 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6198 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6199 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6200 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6201 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6202 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6203 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6204 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6205 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6206 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6207 NEONMAP0(vcvtq_f16_s16),
6208 NEONMAP0(vcvtq_f16_u16),
6209 NEONMAP0(vcvtq_f32_v),
6210 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6211 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6212 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6213 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6214 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6215 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6216 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6217 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6218 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6219 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6220 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6221 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6222 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6223 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6224 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6225 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6226 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6227 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6228 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6229 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6230 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6231 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6232 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6233 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6234 NEONMAP0(vext_v),
6235 NEONMAP0(vextq_v),
6236 NEONMAP0(vfma_v),
6237 NEONMAP0(vfmaq_v),
6238 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6239 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6240 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6241 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6242 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6243 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6244 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6245 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6246 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6247 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6248 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6249 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6250 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6251 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6252 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6253 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6254 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6255 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6256 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6257 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6258 NEONMAP0(vmovl_v),
6259 NEONMAP0(vmovn_v),
6260 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6261 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6262 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6263 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6264 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6265 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6266 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6267 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6268 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6269 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6270 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6271 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6272 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6273 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6274 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6275 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6276 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6277 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6278 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6279 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6280 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6281 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6282 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6283 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6284 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6285 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6286 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6287 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6288 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6289 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6290 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6291 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6292 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6293 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6294 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6295 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6296 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6297 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6298 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6299 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6300 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6301 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6302 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6303 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6304 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6305 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6306 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6307 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6308 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6309 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6310 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6311 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6312 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6313 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6314 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6315 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6316 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6317 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6318 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6319 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6320 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6321 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6322 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6323 NEONMAP0(vrndi_v),
6324 NEONMAP0(vrndiq_v),
6325 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6326 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6327 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6328 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6329 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6330 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6331 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
6332 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
6333 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
6334 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
6335 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
6336 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
6337 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
6338 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
6339 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
6340 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
6341 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
6342 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
6343 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
6344 NEONMAP0(vshl_n_v),
6345 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6346 NEONMAP0(vshll_n_v),
6347 NEONMAP0(vshlq_n_v),
6348 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6349 NEONMAP0(vshr_n_v),
6350 NEONMAP0(vshrn_n_v),
6351 NEONMAP0(vshrq_n_v),
6352 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
6353 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
6354 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
6355 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
6356 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
6357 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
6358 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
6359 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
6360 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
6361 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
6362 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
6363 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
6364 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
6365 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
6366 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
6367 NEONMAP0(vsubhn_v),
6368 NEONMAP0(vtst_v),
6369 NEONMAP0(vtstq_v),
6370 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
6371 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
6372 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
6373 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
6374};
6375
6376static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
6377 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
6378 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
6379 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
6380 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6381 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6382 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6383 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6384 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6385 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6386 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6387 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6388 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
6389 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6390 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
6391 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6392 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6393 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6394 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6395 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6396 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6397 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6398 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6399 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6400 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6401 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6402 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6403 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6404 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6405 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6406 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6407 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6408 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6409 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6410 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6411 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
6412 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6413 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6414 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6415 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6416 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6417 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6418 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6419 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6420 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6421 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6422 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6423 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6424 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6425 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6426 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6427 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6428 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6429 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6430 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
6431 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6432 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6433 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6434 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6435 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6436 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6437 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6438 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6439 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6440 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6441 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6442 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6443 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6444 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6445 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6446 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6447 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6448 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6449 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6450 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6451 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
6452 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
6453 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
6454 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6455 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6456 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6457 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6458 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6459 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6460 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6461 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6462 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6463 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6464 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6465 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
6466 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6467 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
6468 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6469 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6470 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
6471 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
6472 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6473 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6474 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
6475 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
6476 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
6477 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
6478 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
6479 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
6480 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
6481 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
6482 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6483 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6484 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6485 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6486 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
6487 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6488 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6489 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6490 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
6491 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6492 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
6493 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
6494 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6495 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
6496 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6497 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
6498 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
6499 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6500 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6501 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
6502 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
6503 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6504 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6505 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
6506 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
6507 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
6508 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
6509 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6510 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6511 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6512 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6513 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
6514 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6515 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6516 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6517 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6518 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6519 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6520 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
6521 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
6522 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6523 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6524 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6525 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6526 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
6527 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
6528 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
6529 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
6530 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6531 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6532 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
6533 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
6534 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
6535 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6536 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6537 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6538 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6539 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
6540 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6541 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6542 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
6543 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
6544 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
6545 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
6546 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
6547 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
6548 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
6549 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
6550 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
6551 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
6552 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
6553 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
6554 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
6555 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
6556 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
6557 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
6558 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
6559 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
6560 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
6561 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
6562 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
6563 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
6564 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
6565 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
6566 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
6567 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
6568 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
6569 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
6570 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
6571 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
6572 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
6573 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
6574 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
6575 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
6576 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
6577 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
6578 // FP16 scalar intrinisics go here.
6579 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
6580 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6581 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6582 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6583 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6584 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6585 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6586 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6587 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6588 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6589 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6590 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6591 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6592 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6593 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6594 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6595 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6596 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6597 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6598 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6599 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6600 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6601 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6602 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6603 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6604 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6605 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6606 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6607 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6608 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
6609 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
6610 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
6611 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
6612 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
6613};
6614
6615// Some intrinsics are equivalent for codegen.
6616static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
6617 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
6618 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
6619 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
6620 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
6621 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
6622 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
6623 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
6624 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
6625 { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
6626 { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
6627 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
6628 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
6629 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
6630 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
6631 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
6632 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
6633 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
6634 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
6635 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
6636 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
6637 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
6638 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
6639 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
6640 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
6641 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
6642 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
6643 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
6644 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
6645 { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
6646 { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
6647 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
6648 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
6649 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
6650 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
6651 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
6652 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
6653 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
6654 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
6655 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
6656 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
6657 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
6658 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
6659 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
6660 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
6661 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
6662 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
6663 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
6664 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
6665 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
6666 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
6667 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
6668 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
6669 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
6670 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
6671 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
6672 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
6673 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
6674 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
6675 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
6676 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
6677 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
6678 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
6679 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
6680 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
6681 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
6682 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
6683 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
6684 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
6685 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
6686 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
6687 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
6688 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
6689 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
6690 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
6691 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
6692 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
6693 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
6694 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
6695 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
6696 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
6697 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
6698 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
6699 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
6700 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
6701 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
6702 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
6703 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
6704 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
6705 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
6706 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
6707 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
6708 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
6709 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
6710 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
6711 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
6712 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
6713 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
6714 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
6715 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
6716 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
6717 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
6718 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
6719 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
6720 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
6721 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
6722 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
6723 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
6724 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
6725 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
6726 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
6727 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
6728 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
6729 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
6730 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
6731 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
6732 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
6733 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
6734 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
6735 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
6736 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
6737 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
6738 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
6739 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
6740 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
6741 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
6742 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
6743 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
6744 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
6745 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
6746 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
6747 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
6748 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
6749 { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
6750 { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
6751 { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
6752 { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
6753 { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
6754 { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
6755 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
6756 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
6757 // arbitrary one to be handled as tha canonical variation.
6758 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
6759 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
6760 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
6761 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
6762 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
6763 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
6764 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
6765 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
6766 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
6767 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
6768 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
6769 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
6770};
6771
6772#undef NEONMAP0
6773#undef NEONMAP1
6774#undef NEONMAP2
6775
6776#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6777 { \
6778 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
6779 TypeModifier \
6780 }
6781
6782#define SVEMAP2(NameBase, TypeModifier) \
6783 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
6784static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
6785#define GET_SVE_LLVM_INTRINSIC_MAP
6786#include "clang/Basic/arm_sve_builtin_cg.inc"
6787#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
6788#undef GET_SVE_LLVM_INTRINSIC_MAP
6789};
6790
6791#undef SVEMAP1
6792#undef SVEMAP2
6793
6794#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6795 { \
6796 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
6797 TypeModifier \
6798 }
6799
6800#define SMEMAP2(NameBase, TypeModifier) \
6801 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
6802static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
6803#define GET_SME_LLVM_INTRINSIC_MAP
6804#include "clang/Basic/arm_sme_builtin_cg.inc"
6805#undef GET_SME_LLVM_INTRINSIC_MAP
6806};
6807
6808#undef SMEMAP1
6809#undef SMEMAP2
6810
6812
6817
6818static const ARMVectorIntrinsicInfo *
6820 unsigned BuiltinID, bool &MapProvenSorted) {
6821
6822#ifndef NDEBUG
6823 if (!MapProvenSorted) {
6824 assert(llvm::is_sorted(IntrinsicMap));
6825 MapProvenSorted = true;
6826 }
6827#endif
6828
6829 const ARMVectorIntrinsicInfo *Builtin =
6830 llvm::lower_bound(IntrinsicMap, BuiltinID);
6831
6832 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
6833 return Builtin;
6834
6835 return nullptr;
6836}
6837
6838Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
6839 unsigned Modifier,
6840 llvm::Type *ArgType,
6841 const CallExpr *E) {
6842 int VectorSize = 0;
6843 if (Modifier & Use64BitVectors)
6844 VectorSize = 64;
6845 else if (Modifier & Use128BitVectors)
6846 VectorSize = 128;
6847
6848 // Return type.
6850 if (Modifier & AddRetType) {
6851 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6852 if (Modifier & VectorizeRetType)
6853 Ty = llvm::FixedVectorType::get(
6854 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
6855
6856 Tys.push_back(Ty);
6857 }
6858
6859 // Arguments.
6860 if (Modifier & VectorizeArgTypes) {
6861 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
6862 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
6863 }
6864
6865 if (Modifier & (Add1ArgType | Add2ArgTypes))
6866 Tys.push_back(ArgType);
6867
6868 if (Modifier & Add2ArgTypes)
6869 Tys.push_back(ArgType);
6870
6871 if (Modifier & InventFloatType)
6872 Tys.push_back(FloatTy);
6873
6874 return CGM.getIntrinsic(IntrinsicID, Tys);
6875}
6876
6878 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
6879 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
6880 unsigned BuiltinID = SISDInfo.BuiltinID;
6881 unsigned int Int = SISDInfo.LLVMIntrinsic;
6882 unsigned Modifier = SISDInfo.TypeModifier;
6883 const char *s = SISDInfo.NameHint;
6884
6885 switch (BuiltinID) {
6886 case NEON::BI__builtin_neon_vcled_s64:
6887 case NEON::BI__builtin_neon_vcled_u64:
6888 case NEON::BI__builtin_neon_vcles_f32:
6889 case NEON::BI__builtin_neon_vcled_f64:
6890 case NEON::BI__builtin_neon_vcltd_s64:
6891 case NEON::BI__builtin_neon_vcltd_u64:
6892 case NEON::BI__builtin_neon_vclts_f32:
6893 case NEON::BI__builtin_neon_vcltd_f64:
6894 case NEON::BI__builtin_neon_vcales_f32:
6895 case NEON::BI__builtin_neon_vcaled_f64:
6896 case NEON::BI__builtin_neon_vcalts_f32:
6897 case NEON::BI__builtin_neon_vcaltd_f64:
6898 // Only one direction of comparisons actually exist, cmle is actually a cmge
6899 // with swapped operands. The table gives us the right intrinsic but we
6900 // still need to do the swap.
6901 std::swap(Ops[0], Ops[1]);
6902 break;
6903 }
6904
6905 assert(Int && "Generic code assumes a valid intrinsic");
6906
6907 // Determine the type(s) of this overloaded AArch64 intrinsic.
6908 const Expr *Arg = E->getArg(0);
6909 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
6910 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
6911
6912 int j = 0;
6913 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
6914 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6915 ai != ae; ++ai, ++j) {
6916 llvm::Type *ArgTy = ai->getType();
6917 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
6918 ArgTy->getPrimitiveSizeInBits())
6919 continue;
6920
6921 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
6922 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
6923 // it before inserting.
6924 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
6925 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
6926 Ops[j] =
6927 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
6928 }
6929
6930 Value *Result = CGF.EmitNeonCall(F, Ops, s);
6931 llvm::Type *ResultType = CGF.ConvertType(E->getType());
6932 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
6933 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
6934 return CGF.Builder.CreateExtractElement(Result, C0);
6935
6936 return CGF.Builder.CreateBitCast(Result, ResultType, s);
6937}
6938
6940 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
6941 const char *NameHint, unsigned Modifier, const CallExpr *E,
6942 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
6943 llvm::Triple::ArchType Arch) {
6944 // Get the last argument, which specifies the vector type.
6945 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
6946 std::optional<llvm::APSInt> NeonTypeConst =
6948 if (!NeonTypeConst)
6949 return nullptr;
6950
6951 // Determine the type of this overloaded NEON intrinsic.
6952 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
6953 bool Usgn = Type.isUnsigned();
6954 bool Quad = Type.isQuad();
6955 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
6956 const bool AllowBFloatArgsAndRet =
6957 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
6958
6959 llvm::FixedVectorType *VTy =
6960 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
6961 llvm::Type *Ty = VTy;
6962 if (!Ty)
6963 return nullptr;
6964
6965 auto getAlignmentValue32 = [&](Address addr) -> Value* {
6966 return Builder.getInt32(addr.getAlignment().getQuantity());
6967 };
6968
6969 unsigned Int = LLVMIntrinsic;
6970 if ((Modifier & UnsignedAlts) && !Usgn)
6971 Int = AltLLVMIntrinsic;
6972
6973 switch (BuiltinID) {
6974 default: break;
6975 case NEON::BI__builtin_neon_splat_lane_v:
6976 case NEON::BI__builtin_neon_splat_laneq_v:
6977 case NEON::BI__builtin_neon_splatq_lane_v:
6978 case NEON::BI__builtin_neon_splatq_laneq_v: {
6979 auto NumElements = VTy->getElementCount();
6980 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
6981 NumElements = NumElements * 2;
6982 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
6983 NumElements = NumElements.divideCoefficientBy(2);
6984
6985 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6986 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
6987 }
6988 case NEON::BI__builtin_neon_vpadd_v:
6989 case NEON::BI__builtin_neon_vpaddq_v:
6990 // We don't allow fp/int overloading of intrinsics.
6991 if (VTy->getElementType()->isFloatingPointTy() &&
6992 Int == Intrinsic::aarch64_neon_addp)
6993 Int = Intrinsic::aarch64_neon_faddp;
6994 break;
6995 case NEON::BI__builtin_neon_vabs_v:
6996 case NEON::BI__builtin_neon_vabsq_v:
6997 if (VTy->getElementType()->isFloatingPointTy())
6998 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
6999 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7000 case NEON::BI__builtin_neon_vadd_v:
7001 case NEON::BI__builtin_neon_vaddq_v: {
7002 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7003 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7004 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7005 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7006 return Builder.CreateBitCast(Ops[0], Ty);
7007 }
7008 case NEON::BI__builtin_neon_vaddhn_v: {
7009 llvm::FixedVectorType *SrcTy =
7010 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7011
7012 // %sum = add <4 x i32> %lhs, %rhs
7013 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7014 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7015 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7016
7017 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7018 Constant *ShiftAmt =
7019 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7020 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7021
7022 // %res = trunc <4 x i32> %high to <4 x i16>
7023 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7024 }
7025 case NEON::BI__builtin_neon_vcale_v:
7026 case NEON::BI__builtin_neon_vcaleq_v:
7027 case NEON::BI__builtin_neon_vcalt_v:
7028 case NEON::BI__builtin_neon_vcaltq_v:
7029 std::swap(Ops[0], Ops[1]);
7030 [[fallthrough]];
7031 case NEON::BI__builtin_neon_vcage_v:
7032 case NEON::BI__builtin_neon_vcageq_v:
7033 case NEON::BI__builtin_neon_vcagt_v:
7034 case NEON::BI__builtin_neon_vcagtq_v: {
7035 llvm::Type *Ty;
7036 switch (VTy->getScalarSizeInBits()) {
7037 default: llvm_unreachable("unexpected type");
7038 case 32:
7039 Ty = FloatTy;
7040 break;
7041 case 64:
7042 Ty = DoubleTy;
7043 break;
7044 case 16:
7045 Ty = HalfTy;
7046 break;
7047 }
7048 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7049 llvm::Type *Tys[] = { VTy, VecFlt };
7050 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7051 return EmitNeonCall(F, Ops, NameHint);
7052 }
7053 case NEON::BI__builtin_neon_vceqz_v:
7054 case NEON::BI__builtin_neon_vceqzq_v:
7055 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7056 ICmpInst::ICMP_EQ, "vceqz");
7057 case NEON::BI__builtin_neon_vcgez_v:
7058 case NEON::BI__builtin_neon_vcgezq_v:
7059 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7060 ICmpInst::ICMP_SGE, "vcgez");
7061 case NEON::BI__builtin_neon_vclez_v:
7062 case NEON::BI__builtin_neon_vclezq_v:
7063 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7064 ICmpInst::ICMP_SLE, "vclez");
7065 case NEON::BI__builtin_neon_vcgtz_v:
7066 case NEON::BI__builtin_neon_vcgtzq_v:
7067 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7068 ICmpInst::ICMP_SGT, "vcgtz");
7069 case NEON::BI__builtin_neon_vcltz_v:
7070 case NEON::BI__builtin_neon_vcltzq_v:
7071 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7072 ICmpInst::ICMP_SLT, "vcltz");
7073 case NEON::BI__builtin_neon_vclz_v:
7074 case NEON::BI__builtin_neon_vclzq_v:
7075 // We generate target-independent intrinsic, which needs a second argument
7076 // for whether or not clz of zero is undefined; on ARM it isn't.
7077 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7078 break;
7079 case NEON::BI__builtin_neon_vcvt_f32_v:
7080 case NEON::BI__builtin_neon_vcvtq_f32_v:
7081 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7082 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7083 HasLegalHalfType);
7084 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7085 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7086 case NEON::BI__builtin_neon_vcvt_f16_s16:
7087 case NEON::BI__builtin_neon_vcvt_f16_u16:
7088 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7089 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7090 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7091 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7092 HasLegalHalfType);
7093 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7094 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7095 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7096 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7097 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7098 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7099 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7100 Function *F = CGM.getIntrinsic(Int, Tys);
7101 return EmitNeonCall(F, Ops, "vcvt_n");
7102 }
7103 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7104 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7105 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7106 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7107 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7108 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7109 Function *F = CGM.getIntrinsic(Int, Tys);
7110 return EmitNeonCall(F, Ops, "vcvt_n");
7111 }
7112 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7113 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7114 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7115 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7116 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7117 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7118 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7119 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7120 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7121 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7122 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7123 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7124 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7125 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7126 return EmitNeonCall(F, Ops, "vcvt_n");
7127 }
7128 case NEON::BI__builtin_neon_vcvt_s32_v:
7129 case NEON::BI__builtin_neon_vcvt_u32_v:
7130 case NEON::BI__builtin_neon_vcvt_s64_v:
7131 case NEON::BI__builtin_neon_vcvt_u64_v:
7132 case NEON::BI__builtin_neon_vcvt_s16_f16:
7133 case NEON::BI__builtin_neon_vcvt_u16_f16:
7134 case NEON::BI__builtin_neon_vcvtq_s32_v:
7135 case NEON::BI__builtin_neon_vcvtq_u32_v:
7136 case NEON::BI__builtin_neon_vcvtq_s64_v:
7137 case NEON::BI__builtin_neon_vcvtq_u64_v:
7138 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7139 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7140 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7141 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7142 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7143 }
7144 case NEON::BI__builtin_neon_vcvta_s16_f16:
7145 case NEON::BI__builtin_neon_vcvta_s32_v:
7146 case NEON::BI__builtin_neon_vcvta_s64_v:
7147 case NEON::BI__builtin_neon_vcvta_u16_f16:
7148 case NEON::BI__builtin_neon_vcvta_u32_v:
7149 case NEON::BI__builtin_neon_vcvta_u64_v:
7150 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7151 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7152 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7153 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7154 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7155 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7156 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7157 case NEON::BI__builtin_neon_vcvtn_s32_v:
7158 case NEON::BI__builtin_neon_vcvtn_s64_v:
7159 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7160 case NEON::BI__builtin_neon_vcvtn_u32_v:
7161 case NEON::BI__builtin_neon_vcvtn_u64_v:
7162 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7163 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7164 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7165 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7166 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7167 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7168 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7169 case NEON::BI__builtin_neon_vcvtp_s32_v:
7170 case NEON::BI__builtin_neon_vcvtp_s64_v:
7171 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7172 case NEON::BI__builtin_neon_vcvtp_u32_v:
7173 case NEON::BI__builtin_neon_vcvtp_u64_v:
7174 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7175 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7176 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7177 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7178 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7179 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7180 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7181 case NEON::BI__builtin_neon_vcvtm_s32_v:
7182 case NEON::BI__builtin_neon_vcvtm_s64_v:
7183 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7184 case NEON::BI__builtin_neon_vcvtm_u32_v:
7185 case NEON::BI__builtin_neon_vcvtm_u64_v:
7186 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7187 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7188 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7189 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7190 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7191 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7192 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7193 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7194 }
7195 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7196 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7197 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7198
7199 }
7200 case NEON::BI__builtin_neon_vext_v:
7201 case NEON::BI__builtin_neon_vextq_v: {
7202 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7203 SmallVector<int, 16> Indices;
7204 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7205 Indices.push_back(i+CV);
7206
7207 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7208 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7209 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7210 }
7211 case NEON::BI__builtin_neon_vfma_v:
7212 case NEON::BI__builtin_neon_vfmaq_v: {
7213 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7214 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7215 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7216
7217 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7219 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7220 {Ops[1], Ops[2], Ops[0]});
7221 }
7222 case NEON::BI__builtin_neon_vld1_v:
7223 case NEON::BI__builtin_neon_vld1q_v: {
7224 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7225 Ops.push_back(getAlignmentValue32(PtrOp0));
7226 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7227 }
7228 case NEON::BI__builtin_neon_vld1_x2_v:
7229 case NEON::BI__builtin_neon_vld1q_x2_v:
7230 case NEON::BI__builtin_neon_vld1_x3_v:
7231 case NEON::BI__builtin_neon_vld1q_x3_v:
7232 case NEON::BI__builtin_neon_vld1_x4_v:
7233 case NEON::BI__builtin_neon_vld1q_x4_v: {
7234 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
7235 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7236 llvm::Type *Tys[2] = { VTy, PTy };
7237 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7238 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7239 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7240 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7241 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7242 }
7243 case NEON::BI__builtin_neon_vld2_v:
7244 case NEON::BI__builtin_neon_vld2q_v:
7245 case NEON::BI__builtin_neon_vld3_v:
7246 case NEON::BI__builtin_neon_vld3q_v:
7247 case NEON::BI__builtin_neon_vld4_v:
7248 case NEON::BI__builtin_neon_vld4q_v:
7249 case NEON::BI__builtin_neon_vld2_dup_v:
7250 case NEON::BI__builtin_neon_vld2q_dup_v:
7251 case NEON::BI__builtin_neon_vld3_dup_v:
7252 case NEON::BI__builtin_neon_vld3q_dup_v:
7253 case NEON::BI__builtin_neon_vld4_dup_v:
7254 case NEON::BI__builtin_neon_vld4q_dup_v: {
7255 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7256 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7257 Value *Align = getAlignmentValue32(PtrOp1);
7258 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7259 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7260 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7261 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7262 }
7263 case NEON::BI__builtin_neon_vld1_dup_v:
7264 case NEON::BI__builtin_neon_vld1q_dup_v: {
7265 Value *V = PoisonValue::get(Ty);
7266 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7267 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7268 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7269 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7270 return EmitNeonSplat(Ops[0], CI);
7271 }
7272 case NEON::BI__builtin_neon_vld2_lane_v:
7273 case NEON::BI__builtin_neon_vld2q_lane_v:
7274 case NEON::BI__builtin_neon_vld3_lane_v:
7275 case NEON::BI__builtin_neon_vld3q_lane_v:
7276 case NEON::BI__builtin_neon_vld4_lane_v:
7277 case NEON::BI__builtin_neon_vld4q_lane_v: {
7278 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7279 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7280 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7281 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7282 Ops.push_back(getAlignmentValue32(PtrOp1));
7283 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7284 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7285 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7286 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7287 }
7288 case NEON::BI__builtin_neon_vmovl_v: {
7289 llvm::FixedVectorType *DTy =
7290 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7291 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7292 if (Usgn)
7293 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
7294 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
7295 }
7296 case NEON::BI__builtin_neon_vmovn_v: {
7297 llvm::FixedVectorType *QTy =
7298 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7299 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
7300 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
7301 }
7302 case NEON::BI__builtin_neon_vmull_v:
7303 // FIXME: the integer vmull operations could be emitted in terms of pure
7304 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7305 // hoisting the exts outside loops. Until global ISel comes along that can
7306 // see through such movement this leads to bad CodeGen. So we need an
7307 // intrinsic for now.
7308 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
7309 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
7310 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7311 case NEON::BI__builtin_neon_vpadal_v:
7312 case NEON::BI__builtin_neon_vpadalq_v: {
7313 // The source operand type has twice as many elements of half the size.
7314 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7315 llvm::Type *EltTy =
7316 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7317 auto *NarrowTy =
7318 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7319 llvm::Type *Tys[2] = { Ty, NarrowTy };
7320 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7321 }
7322 case NEON::BI__builtin_neon_vpaddl_v:
7323 case NEON::BI__builtin_neon_vpaddlq_v: {
7324 // The source operand type has twice as many elements of half the size.
7325 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7326 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7327 auto *NarrowTy =
7328 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7329 llvm::Type *Tys[2] = { Ty, NarrowTy };
7330 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
7331 }
7332 case NEON::BI__builtin_neon_vqdmlal_v:
7333 case NEON::BI__builtin_neon_vqdmlsl_v: {
7334 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7335 Ops[1] =
7336 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
7337 Ops.resize(2);
7338 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
7339 }
7340 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7341 case NEON::BI__builtin_neon_vqdmulh_lane_v:
7342 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
7343 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
7344 auto *RTy = cast<llvm::FixedVectorType>(Ty);
7345 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
7346 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
7347 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
7348 RTy->getNumElements() * 2);
7349 llvm::Type *Tys[2] = {
7350 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7351 /*isQuad*/ false))};
7352 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7353 }
7354 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
7355 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
7356 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
7357 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
7358 llvm::Type *Tys[2] = {
7359 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7360 /*isQuad*/ true))};
7361 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7362 }
7363 case NEON::BI__builtin_neon_vqshl_n_v:
7364 case NEON::BI__builtin_neon_vqshlq_n_v:
7365 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
7366 1, false);
7367 case NEON::BI__builtin_neon_vqshlu_n_v:
7368 case NEON::BI__builtin_neon_vqshluq_n_v:
7369 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
7370 1, false);
7371 case NEON::BI__builtin_neon_vrecpe_v:
7372 case NEON::BI__builtin_neon_vrecpeq_v:
7373 case NEON::BI__builtin_neon_vrsqrte_v:
7374 case NEON::BI__builtin_neon_vrsqrteq_v:
7375 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
7376 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7377 case NEON::BI__builtin_neon_vrndi_v:
7378 case NEON::BI__builtin_neon_vrndiq_v:
7379 Int = Builder.getIsFPConstrained()
7380 ? Intrinsic::experimental_constrained_nearbyint
7381 : Intrinsic::nearbyint;
7382 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7383 case NEON::BI__builtin_neon_vrshr_n_v:
7384 case NEON::BI__builtin_neon_vrshrq_n_v:
7385 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
7386 1, true);
7387 case NEON::BI__builtin_neon_vsha512hq_u64:
7388 case NEON::BI__builtin_neon_vsha512h2q_u64:
7389 case NEON::BI__builtin_neon_vsha512su0q_u64:
7390 case NEON::BI__builtin_neon_vsha512su1q_u64: {
7391 Function *F = CGM.getIntrinsic(Int);
7392 return EmitNeonCall(F, Ops, "");
7393 }
7394 case NEON::BI__builtin_neon_vshl_n_v:
7395 case NEON::BI__builtin_neon_vshlq_n_v:
7396 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
7397 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
7398 "vshl_n");
7399 case NEON::BI__builtin_neon_vshll_n_v: {
7400 llvm::FixedVectorType *SrcTy =
7401 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7402 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7403 if (Usgn)
7404 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
7405 else
7406 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
7407 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
7408 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
7409 }
7410 case NEON::BI__builtin_neon_vshrn_n_v: {
7411 llvm::FixedVectorType *SrcTy =
7412 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7413 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7414 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
7415 if (Usgn)
7416 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
7417 else
7418 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
7419 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
7420 }
7421 case NEON::BI__builtin_neon_vshr_n_v:
7422 case NEON::BI__builtin_neon_vshrq_n_v:
7423 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
7424 case NEON::BI__builtin_neon_vst1_v:
7425 case NEON::BI__builtin_neon_vst1q_v:
7426 case NEON::BI__builtin_neon_vst2_v:
7427 case NEON::BI__builtin_neon_vst2q_v:
7428 case NEON::BI__builtin_neon_vst3_v:
7429 case NEON::BI__builtin_neon_vst3q_v:
7430 case NEON::BI__builtin_neon_vst4_v:
7431 case NEON::BI__builtin_neon_vst4q_v:
7432 case NEON::BI__builtin_neon_vst2_lane_v:
7433 case NEON::BI__builtin_neon_vst2q_lane_v:
7434 case NEON::BI__builtin_neon_vst3_lane_v:
7435 case NEON::BI__builtin_neon_vst3q_lane_v:
7436 case NEON::BI__builtin_neon_vst4_lane_v:
7437 case NEON::BI__builtin_neon_vst4q_lane_v: {
7438 llvm::Type *Tys[] = {Int8PtrTy, Ty};
7439 Ops.push_back(getAlignmentValue32(PtrOp0));
7440 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7441 }
7442 case NEON::BI__builtin_neon_vsm3partw1q_u32:
7443 case NEON::BI__builtin_neon_vsm3partw2q_u32:
7444 case NEON::BI__builtin_neon_vsm3ss1q_u32:
7445 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
7446 case NEON::BI__builtin_neon_vsm4eq_u32: {
7447 Function *F = CGM.getIntrinsic(Int);
7448 return EmitNeonCall(F, Ops, "");
7449 }
7450 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
7451 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
7452 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
7453 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
7454 Function *F = CGM.getIntrinsic(Int);
7455 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7456 return EmitNeonCall(F, Ops, "");
7457 }
7458 case NEON::BI__builtin_neon_vst1_x2_v:
7459 case NEON::BI__builtin_neon_vst1q_x2_v:
7460 case NEON::BI__builtin_neon_vst1_x3_v:
7461 case NEON::BI__builtin_neon_vst1q_x3_v:
7462 case NEON::BI__builtin_neon_vst1_x4_v:
7463 case NEON::BI__builtin_neon_vst1q_x4_v: {
7464 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
7465 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
7466 // in AArch64 it comes last. We may want to stick to one or another.
7467 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
7468 Arch == llvm::Triple::aarch64_32) {
7469 llvm::Type *Tys[2] = { VTy, PTy };
7470 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7471 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7472 }
7473 llvm::Type *Tys[2] = { PTy, VTy };
7474 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7475 }
7476 case NEON::BI__builtin_neon_vsubhn_v: {
7477 llvm::FixedVectorType *SrcTy =
7478 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7479
7480 // %sum = add <4 x i32> %lhs, %rhs
7481 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7482 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7483 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
7484
7485 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7486 Constant *ShiftAmt =
7487 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7488 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
7489
7490 // %res = trunc <4 x i32> %high to <4 x i16>
7491 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
7492 }
7493 case NEON::BI__builtin_neon_vtrn_v:
7494 case NEON::BI__builtin_neon_vtrnq_v: {
7495 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7496 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7497 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7498 Value *SV = nullptr;
7499
7500 for (unsigned vi = 0; vi != 2; ++vi) {
7501 SmallVector<int, 16> Indices;
7502 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7503 Indices.push_back(i+vi);
7504 Indices.push_back(i+e+vi);
7505 }
7506 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7507 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7508 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7509 }
7510 return SV;
7511 }
7512 case NEON::BI__builtin_neon_vtst_v:
7513 case NEON::BI__builtin_neon_vtstq_v: {
7514 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7515 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7516 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7517 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7518 ConstantAggregateZero::get(Ty));
7519 return Builder.CreateSExt(Ops[0], Ty, "vtst");
7520 }
7521 case NEON::BI__builtin_neon_vuzp_v:
7522 case NEON::BI__builtin_neon_vuzpq_v: {
7523 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7524 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7525 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7526 Value *SV = nullptr;
7527
7528 for (unsigned vi = 0; vi != 2; ++vi) {
7529 SmallVector<int, 16> Indices;
7530 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7531 Indices.push_back(2*i+vi);
7532
7533 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7534 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7535 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7536 }
7537 return SV;
7538 }
7539 case NEON::BI__builtin_neon_vxarq_u64: {
7540 Function *F = CGM.getIntrinsic(Int);
7541 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7542 return EmitNeonCall(F, Ops, "");
7543 }
7544 case NEON::BI__builtin_neon_vzip_v:
7545 case NEON::BI__builtin_neon_vzipq_v: {
7546 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7547 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7548 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7549 Value *SV = nullptr;
7550
7551 for (unsigned vi = 0; vi != 2; ++vi) {
7552 SmallVector<int, 16> Indices;
7553 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7554 Indices.push_back((i + vi*e) >> 1);
7555 Indices.push_back(((i + vi*e) >> 1)+e);
7556 }
7557 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7558 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7559 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7560 }
7561 return SV;
7562 }
7563 case NEON::BI__builtin_neon_vdot_s32:
7564 case NEON::BI__builtin_neon_vdot_u32:
7565 case NEON::BI__builtin_neon_vdotq_s32:
7566 case NEON::BI__builtin_neon_vdotq_u32: {
7567 auto *InputTy =
7568 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7569 llvm::Type *Tys[2] = { Ty, InputTy };
7570 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
7571 }
7572 case NEON::BI__builtin_neon_vfmlal_low_f16:
7573 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
7574 auto *InputTy =
7575 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7576 llvm::Type *Tys[2] = { Ty, InputTy };
7577 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
7578 }
7579 case NEON::BI__builtin_neon_vfmlsl_low_f16:
7580 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
7581 auto *InputTy =
7582 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7583 llvm::Type *Tys[2] = { Ty, InputTy };
7584 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
7585 }
7586 case NEON::BI__builtin_neon_vfmlal_high_f16:
7587 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
7588 auto *InputTy =
7589 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7590 llvm::Type *Tys[2] = { Ty, InputTy };
7591 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
7592 }
7593 case NEON::BI__builtin_neon_vfmlsl_high_f16:
7594 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
7595 auto *InputTy =
7596 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7597 llvm::Type *Tys[2] = { Ty, InputTy };
7598 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
7599 }
7600 case NEON::BI__builtin_neon_vmmlaq_s32:
7601 case NEON::BI__builtin_neon_vmmlaq_u32: {
7602 auto *InputTy =
7603 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7604 llvm::Type *Tys[2] = { Ty, InputTy };
7605 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
7606 }
7607 case NEON::BI__builtin_neon_vusmmlaq_s32: {
7608 auto *InputTy =
7609 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7610 llvm::Type *Tys[2] = { Ty, InputTy };
7611 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
7612 }
7613 case NEON::BI__builtin_neon_vusdot_s32:
7614 case NEON::BI__builtin_neon_vusdotq_s32: {
7615 auto *InputTy =
7616 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
7617 llvm::Type *Tys[2] = { Ty, InputTy };
7618 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
7619 }
7620 case NEON::BI__builtin_neon_vbfdot_f32:
7621 case NEON::BI__builtin_neon_vbfdotq_f32: {
7622 llvm::Type *InputTy =
7623 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
7624 llvm::Type *Tys[2] = { Ty, InputTy };
7625 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
7626 }
7627 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
7628 llvm::Type *Tys[1] = { Ty };
7629 Function *F = CGM.getIntrinsic(Int, Tys);
7630 return EmitNeonCall(F, Ops, "vcvtfp2bf");
7631 }
7632
7633 }
7634
7635 assert(Int && "Expected valid intrinsic number");
7636
7637 // Determine the type(s) of this overloaded AArch64 intrinsic.
7638 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
7639
7640 Value *Result = EmitNeonCall(F, Ops, NameHint);
7641 llvm::Type *ResultType = ConvertType(E->getType());
7642 // AArch64 intrinsic one-element vector type cast to
7643 // scalar type expected by the builtin
7644 return Builder.CreateBitCast(Result, ResultType, NameHint);
7645}
7646
7648 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
7649 const CmpInst::Predicate Ip, const Twine &Name) {
7650 llvm::Type *OTy = Op->getType();
7651
7652 // FIXME: this is utterly horrific. We should not be looking at previous
7653 // codegen context to find out what needs doing. Unfortunately TableGen
7654 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
7655 // (etc).
7656 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
7657 OTy = BI->getOperand(0)->getType();
7658
7659 Op = Builder.CreateBitCast(Op, OTy);
7660 if (OTy->getScalarType()->isFloatingPointTy()) {
7661 if (Fp == CmpInst::FCMP_OEQ)
7662 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
7663 else
7664 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
7665 } else {
7666 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
7667 }
7668 return Builder.CreateSExt(Op, Ty, Name);
7669}
7670
7672 Value *ExtOp, Value *IndexOp,
7673 llvm::Type *ResTy, unsigned IntID,
7674 const char *Name) {
7676 if (ExtOp)
7677 TblOps.push_back(ExtOp);
7678
7679 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
7680 SmallVector<int, 16> Indices;
7681 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
7682 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
7683 Indices.push_back(2*i);
7684 Indices.push_back(2*i+1);
7685 }
7686
7687 int PairPos = 0, End = Ops.size() - 1;
7688 while (PairPos < End) {
7689 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
7690 Ops[PairPos+1], Indices,
7691 Name));
7692 PairPos += 2;
7693 }
7694
7695 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
7696 // of the 128-bit lookup table with zero.
7697 if (PairPos == End) {
7698 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
7699 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
7700 ZeroTbl, Indices, Name));
7701 }
7702
7703 Function *TblF;
7704 TblOps.push_back(IndexOp);
7705 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
7706
7707 return CGF.EmitNeonCall(TblF, TblOps, Name);
7708}
7709
7710Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
7711 unsigned Value;
7712 switch (BuiltinID) {
7713 default:
7714 return nullptr;
7715 case clang::ARM::BI__builtin_arm_nop:
7716 Value = 0;
7717 break;
7718 case clang::ARM::BI__builtin_arm_yield:
7719 case clang::ARM::BI__yield:
7720 Value = 1;
7721 break;
7722 case clang::ARM::BI__builtin_arm_wfe:
7723 case clang::ARM::BI__wfe:
7724 Value = 2;
7725 break;
7726 case clang::ARM::BI__builtin_arm_wfi:
7727 case clang::ARM::BI__wfi:
7728 Value = 3;
7729 break;
7730 case clang::ARM::BI__builtin_arm_sev:
7731 case clang::ARM::BI__sev:
7732 Value = 4;
7733 break;
7734 case clang::ARM::BI__builtin_arm_sevl:
7735 case clang::ARM::BI__sevl:
7736 Value = 5;
7737 break;
7738 }
7739
7740 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
7741 llvm::ConstantInt::get(Int32Ty, Value));
7742}
7743
7749
7750// Generates the IR for the read/write special register builtin,
7751// ValueType is the type of the value that is to be written or read,
7752// RegisterType is the type of the register being written to or read from.
7754 const CallExpr *E,
7755 llvm::Type *RegisterType,
7756 llvm::Type *ValueType,
7757 SpecialRegisterAccessKind AccessKind,
7758 StringRef SysReg = "") {
7759 // write and register intrinsics only support 32, 64 and 128 bit operations.
7760 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
7761 RegisterType->isIntegerTy(128)) &&
7762 "Unsupported size for register.");
7763
7764 CodeGen::CGBuilderTy &Builder = CGF.Builder;
7765 CodeGen::CodeGenModule &CGM = CGF.CGM;
7766 LLVMContext &Context = CGM.getLLVMContext();
7767
7768 if (SysReg.empty()) {
7769 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
7770 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
7771 }
7772
7773 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
7774 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7775 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7776
7777 llvm::Type *Types[] = { RegisterType };
7778
7779 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
7780 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
7781 && "Can't fit 64-bit value in 32-bit register");
7782
7783 if (AccessKind != Write) {
7784 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
7785 llvm::Function *F = CGM.getIntrinsic(
7786 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
7787 : llvm::Intrinsic::read_register,
7788 Types);
7789 llvm::Value *Call = Builder.CreateCall(F, Metadata);
7790
7791 if (MixedTypes)
7792 // Read into 64 bit register and then truncate result to 32 bit.
7793 return Builder.CreateTrunc(Call, ValueType);
7794
7795 if (ValueType->isPointerTy())
7796 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
7797 return Builder.CreateIntToPtr(Call, ValueType);
7798
7799 return Call;
7800 }
7801
7802 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
7803 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
7804 if (MixedTypes) {
7805 // Extend 32 bit write value to 64 bit to pass to write.
7806 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
7807 return Builder.CreateCall(F, { Metadata, ArgValue });
7808 }
7809
7810 if (ValueType->isPointerTy()) {
7811 // Have VoidPtrTy ArgValue but want to return an i32/i64.
7812 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
7813 return Builder.CreateCall(F, { Metadata, ArgValue });
7814 }
7815
7816 return Builder.CreateCall(F, { Metadata, ArgValue });
7817}
7818
7819/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
7820/// argument that specifies the vector type.
7821static bool HasExtraNeonArgument(unsigned BuiltinID) {
7822 switch (BuiltinID) {
7823 default: break;
7824 case NEON::BI__builtin_neon_vget_lane_i8:
7825 case NEON::BI__builtin_neon_vget_lane_i16:
7826 case NEON::BI__builtin_neon_vget_lane_bf16:
7827 case NEON::BI__builtin_neon_vget_lane_i32:
7828 case NEON::BI__builtin_neon_vget_lane_i64:
7829 case NEON::BI__builtin_neon_vget_lane_f32:
7830 case NEON::BI__builtin_neon_vgetq_lane_i8:
7831 case NEON::BI__builtin_neon_vgetq_lane_i16:
7832 case NEON::BI__builtin_neon_vgetq_lane_bf16:
7833 case NEON::BI__builtin_neon_vgetq_lane_i32:
7834 case NEON::BI__builtin_neon_vgetq_lane_i64:
7835 case NEON::BI__builtin_neon_vgetq_lane_f32:
7836 case NEON::BI__builtin_neon_vduph_lane_bf16:
7837 case NEON::BI__builtin_neon_vduph_laneq_bf16:
7838 case NEON::BI__builtin_neon_vset_lane_i8:
7839 case NEON::BI__builtin_neon_vset_lane_i16:
7840 case NEON::BI__builtin_neon_vset_lane_bf16:
7841 case NEON::BI__builtin_neon_vset_lane_i32:
7842 case NEON::BI__builtin_neon_vset_lane_i64:
7843 case NEON::BI__builtin_neon_vset_lane_f32:
7844 case NEON::BI__builtin_neon_vsetq_lane_i8:
7845 case NEON::BI__builtin_neon_vsetq_lane_i16:
7846 case NEON::BI__builtin_neon_vsetq_lane_bf16:
7847 case NEON::BI__builtin_neon_vsetq_lane_i32:
7848 case NEON::BI__builtin_neon_vsetq_lane_i64:
7849 case NEON::BI__builtin_neon_vsetq_lane_f32:
7850 case NEON::BI__builtin_neon_vsha1h_u32:
7851 case NEON::BI__builtin_neon_vsha1cq_u32:
7852 case NEON::BI__builtin_neon_vsha1pq_u32:
7853 case NEON::BI__builtin_neon_vsha1mq_u32:
7854 case NEON::BI__builtin_neon_vcvth_bf16_f32:
7855 case clang::ARM::BI_MoveToCoprocessor:
7856 case clang::ARM::BI_MoveToCoprocessor2:
7857 return false;
7858 }
7859 return true;
7860}
7861
7862Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
7863 const CallExpr *E,
7864 ReturnValueSlot ReturnValue,
7865 llvm::Triple::ArchType Arch) {
7866 if (auto Hint = GetValueForARMHint(BuiltinID))
7867 return Hint;
7868
7869 if (BuiltinID == clang::ARM::BI__emit) {
7870 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
7871 llvm::FunctionType *FTy =
7872 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
7873
7875 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
7876 llvm_unreachable("Sema will ensure that the parameter is constant");
7877
7878 llvm::APSInt Value = Result.Val.getInt();
7879 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
7880
7881 llvm::InlineAsm *Emit =
7882 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
7883 /*hasSideEffects=*/true)
7884 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
7885 /*hasSideEffects=*/true);
7886
7887 return Builder.CreateCall(Emit);
7888 }
7889
7890 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
7891 Value *Option = EmitScalarExpr(E->getArg(0));
7892 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
7893 }
7894
7895 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
7897 Value *RW = EmitScalarExpr(E->getArg(1));
7898 Value *IsData = EmitScalarExpr(E->getArg(2));
7899
7900 // Locality is not supported on ARM target
7901 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
7902
7903 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
7904 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
7905 }
7906
7907 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
7908 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7909 return Builder.CreateCall(
7910 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
7911 }
7912
7913 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
7914 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
7915 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7916 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
7917 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
7918 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
7919 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
7920 return Res;
7921 }
7922
7923
7924 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
7925 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7926 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
7927 }
7928 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
7929 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
7930 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
7931 "cls");
7932 }
7933
7934 if (BuiltinID == clang::ARM::BI__clear_cache) {
7935 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
7936 const FunctionDecl *FD = E->getDirectCallee();
7937 Value *Ops[2];
7938 for (unsigned i = 0; i < 2; i++)
7939 Ops[i] = EmitScalarExpr(E->getArg(i));
7940 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
7941 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
7942 StringRef Name = FD->getName();
7943 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
7944 }
7945
7946 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
7947 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
7948 Function *F;
7949
7950 switch (BuiltinID) {
7951 default: llvm_unreachable("unexpected builtin");
7952 case clang::ARM::BI__builtin_arm_mcrr:
7953 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
7954 break;
7955 case clang::ARM::BI__builtin_arm_mcrr2:
7956 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
7957 break;
7958 }
7959
7960 // MCRR{2} instruction has 5 operands but
7961 // the intrinsic has 4 because Rt and Rt2
7962 // are represented as a single unsigned 64
7963 // bit integer in the intrinsic definition
7964 // but internally it's represented as 2 32
7965 // bit integers.
7966
7967 Value *Coproc = EmitScalarExpr(E->getArg(0));
7968 Value *Opc1 = EmitScalarExpr(E->getArg(1));
7969 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
7970 Value *CRm = EmitScalarExpr(E->getArg(3));
7971
7972 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
7973 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
7974 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
7975 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
7976
7977 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
7978 }
7979
7980 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
7981 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
7982 Function *F;
7983
7984 switch (BuiltinID) {
7985 default: llvm_unreachable("unexpected builtin");
7986 case clang::ARM::BI__builtin_arm_mrrc:
7987 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
7988 break;
7989 case clang::ARM::BI__builtin_arm_mrrc2:
7990 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
7991 break;
7992 }
7993
7994 Value *Coproc = EmitScalarExpr(E->getArg(0));
7995 Value *Opc1 = EmitScalarExpr(E->getArg(1));
7996 Value *CRm = EmitScalarExpr(E->getArg(2));
7997 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
7998
7999 // Returns an unsigned 64 bit integer, represented
8000 // as two 32 bit integers.
8001
8002 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8003 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8004 Rt = Builder.CreateZExt(Rt, Int64Ty);
8005 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8006
8007 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8008 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8009 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8010
8011 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8012 }
8013
8014 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8015 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8016 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8017 getContext().getTypeSize(E->getType()) == 64) ||
8018 BuiltinID == clang::ARM::BI__ldrexd) {
8019 Function *F;
8020
8021 switch (BuiltinID) {
8022 default: llvm_unreachable("unexpected builtin");
8023 case clang::ARM::BI__builtin_arm_ldaex:
8024 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8025 break;
8026 case clang::ARM::BI__builtin_arm_ldrexd:
8027 case clang::ARM::BI__builtin_arm_ldrex:
8028 case clang::ARM::BI__ldrexd:
8029 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8030 break;
8031 }
8032
8033 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8034 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
8035 "ldrexd");
8036
8037 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8038 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8039 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8040 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8041
8042 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8043 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8044 Val = Builder.CreateOr(Val, Val1);
8045 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8046 }
8047
8048 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8049 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8050 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8051
8052 QualType Ty = E->getType();
8053 llvm::Type *RealResTy = ConvertType(Ty);
8054 llvm::Type *IntTy =
8055 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8056 llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext());
8057
8058 Function *F = CGM.getIntrinsic(
8059 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8060 : Intrinsic::arm_ldrex,
8061 PtrTy);
8062 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8063 Val->addParamAttr(
8064 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8065
8066 if (RealResTy->isPointerTy())
8067 return Builder.CreateIntToPtr(Val, RealResTy);
8068 else {
8069 llvm::Type *IntResTy = llvm::IntegerType::get(
8070 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8071 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8072 RealResTy);
8073 }
8074 }
8075
8076 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8077 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8078 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8079 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8080 Function *F = CGM.getIntrinsic(
8081 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8082 : Intrinsic::arm_strexd);
8083 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8084
8085 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8086 Value *Val = EmitScalarExpr(E->getArg(0));
8087 Builder.CreateStore(Val, Tmp);
8088
8089 Address LdPtr = Tmp.withElementType(STy);
8090 Val = Builder.CreateLoad(LdPtr);
8091
8092 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8093 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8094 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
8095 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8096 }
8097
8098 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8099 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8100 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8101 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8102
8103 QualType Ty = E->getArg(0)->getType();
8104 llvm::Type *StoreTy =
8105 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8106
8107 if (StoreVal->getType()->isPointerTy())
8108 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8109 else {
8110 llvm::Type *IntTy = llvm::IntegerType::get(
8112 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8113 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8114 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8115 }
8116
8117 Function *F = CGM.getIntrinsic(
8118 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8119 : Intrinsic::arm_strex,
8120 StoreAddr->getType());
8121
8122 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8123 CI->addParamAttr(
8124 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8125 return CI;
8126 }
8127
8128 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8129 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8130 return Builder.CreateCall(F);
8131 }
8132
8133 // CRC32
8134 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8135 switch (BuiltinID) {
8136 case clang::ARM::BI__builtin_arm_crc32b:
8137 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8138 case clang::ARM::BI__builtin_arm_crc32cb:
8139 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8140 case clang::ARM::BI__builtin_arm_crc32h:
8141 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8142 case clang::ARM::BI__builtin_arm_crc32ch:
8143 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8144 case clang::ARM::BI__builtin_arm_crc32w:
8145 case clang::ARM::BI__builtin_arm_crc32d:
8146 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8147 case clang::ARM::BI__builtin_arm_crc32cw:
8148 case clang::ARM::BI__builtin_arm_crc32cd:
8149 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8150 }
8151
8152 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8153 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8154 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8155
8156 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8157 // intrinsics, hence we need different codegen for these cases.
8158 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8159 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8160 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8161 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8162 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8163 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8164
8165 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8166 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8167 return Builder.CreateCall(F, {Res, Arg1b});
8168 } else {
8169 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8170
8171 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8172 return Builder.CreateCall(F, {Arg0, Arg1});
8173 }
8174 }
8175
8176 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8177 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8178 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8179 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8180 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8181 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8182
8183 SpecialRegisterAccessKind AccessKind = Write;
8184 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8185 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8186 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8187 AccessKind = VolatileRead;
8188
8189 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8190 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8191
8192 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8193 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8194
8195 llvm::Type *ValueType;
8196 llvm::Type *RegisterType;
8197 if (IsPointerBuiltin) {
8198 ValueType = VoidPtrTy;
8199 RegisterType = Int32Ty;
8200 } else if (Is64Bit) {
8201 ValueType = RegisterType = Int64Ty;
8202 } else {
8203 ValueType = RegisterType = Int32Ty;
8204 }
8205
8206 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8207 AccessKind);
8208 }
8209
8210 if (BuiltinID == ARM::BI__builtin_sponentry) {
8211 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8212 return Builder.CreateCall(F);
8213 }
8214
8215 // Handle MSVC intrinsics before argument evaluation to prevent double
8216 // evaluation.
8217 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8218 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8219
8220 // Deal with MVE builtins
8221 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8222 return Result;
8223 // Handle CDE builtins
8224 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8225 return Result;
8226
8227 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8228 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8229 return P.first == BuiltinID;
8230 });
8231 if (It != end(NEONEquivalentIntrinsicMap))
8232 BuiltinID = It->second;
8233
8234 // Find out if any arguments are required to be integer constant
8235 // expressions.
8236 unsigned ICEArguments = 0;
8238 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8239 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8240
8241 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8242 return Builder.getInt32(addr.getAlignment().getQuantity());
8243 };
8244
8245 Address PtrOp0 = Address::invalid();
8246 Address PtrOp1 = Address::invalid();
8248 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8249 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8250 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8251 if (i == 0) {
8252 switch (BuiltinID) {
8253 case NEON::BI__builtin_neon_vld1_v:
8254 case NEON::BI__builtin_neon_vld1q_v:
8255 case NEON::BI__builtin_neon_vld1q_lane_v:
8256 case NEON::BI__builtin_neon_vld1_lane_v:
8257 case NEON::BI__builtin_neon_vld1_dup_v:
8258 case NEON::BI__builtin_neon_vld1q_dup_v:
8259 case NEON::BI__builtin_neon_vst1_v:
8260 case NEON::BI__builtin_neon_vst1q_v:
8261 case NEON::BI__builtin_neon_vst1q_lane_v:
8262 case NEON::BI__builtin_neon_vst1_lane_v:
8263 case NEON::BI__builtin_neon_vst2_v:
8264 case NEON::BI__builtin_neon_vst2q_v:
8265 case NEON::BI__builtin_neon_vst2_lane_v:
8266 case NEON::BI__builtin_neon_vst2q_lane_v:
8267 case NEON::BI__builtin_neon_vst3_v:
8268 case NEON::BI__builtin_neon_vst3q_v:
8269 case NEON::BI__builtin_neon_vst3_lane_v:
8270 case NEON::BI__builtin_neon_vst3q_lane_v:
8271 case NEON::BI__builtin_neon_vst4_v:
8272 case NEON::BI__builtin_neon_vst4q_v:
8273 case NEON::BI__builtin_neon_vst4_lane_v:
8274 case NEON::BI__builtin_neon_vst4q_lane_v:
8275 // Get the alignment for the argument in addition to the value;
8276 // we'll use it later.
8277 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8278 Ops.push_back(PtrOp0.getPointer());
8279 continue;
8280 }
8281 }
8282 if (i == 1) {
8283 switch (BuiltinID) {
8284 case NEON::BI__builtin_neon_vld2_v:
8285 case NEON::BI__builtin_neon_vld2q_v:
8286 case NEON::BI__builtin_neon_vld3_v:
8287 case NEON::BI__builtin_neon_vld3q_v:
8288 case NEON::BI__builtin_neon_vld4_v:
8289 case NEON::BI__builtin_neon_vld4q_v:
8290 case NEON::BI__builtin_neon_vld2_lane_v:
8291 case NEON::BI__builtin_neon_vld2q_lane_v:
8292 case NEON::BI__builtin_neon_vld3_lane_v:
8293 case NEON::BI__builtin_neon_vld3q_lane_v:
8294 case NEON::BI__builtin_neon_vld4_lane_v:
8295 case NEON::BI__builtin_neon_vld4q_lane_v:
8296 case NEON::BI__builtin_neon_vld2_dup_v:
8297 case NEON::BI__builtin_neon_vld2q_dup_v:
8298 case NEON::BI__builtin_neon_vld3_dup_v:
8299 case NEON::BI__builtin_neon_vld3q_dup_v:
8300 case NEON::BI__builtin_neon_vld4_dup_v:
8301 case NEON::BI__builtin_neon_vld4q_dup_v:
8302 // Get the alignment for the argument in addition to the value;
8303 // we'll use it later.
8304 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
8305 Ops.push_back(PtrOp1.getPointer());
8306 continue;
8307 }
8308 }
8309
8310 if ((ICEArguments & (1 << i)) == 0) {
8311 Ops.push_back(EmitScalarExpr(E->getArg(i)));
8312 } else {
8313 // If this is required to be a constant, constant fold it so that we know
8314 // that the generated intrinsic gets a ConstantInt.
8315 Ops.push_back(llvm::ConstantInt::get(
8318 }
8319 }
8320
8321 switch (BuiltinID) {
8322 default: break;
8323
8324 case NEON::BI__builtin_neon_vget_lane_i8:
8325 case NEON::BI__builtin_neon_vget_lane_i16:
8326 case NEON::BI__builtin_neon_vget_lane_i32:
8327 case NEON::BI__builtin_neon_vget_lane_i64:
8328 case NEON::BI__builtin_neon_vget_lane_bf16:
8329 case NEON::BI__builtin_neon_vget_lane_f32:
8330 case NEON::BI__builtin_neon_vgetq_lane_i8:
8331 case NEON::BI__builtin_neon_vgetq_lane_i16:
8332 case NEON::BI__builtin_neon_vgetq_lane_i32:
8333 case NEON::BI__builtin_neon_vgetq_lane_i64:
8334 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8335 case NEON::BI__builtin_neon_vgetq_lane_f32:
8336 case NEON::BI__builtin_neon_vduph_lane_bf16:
8337 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8338 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
8339
8340 case NEON::BI__builtin_neon_vrndns_f32: {
8341 Value *Arg = EmitScalarExpr(E->getArg(0));
8342 llvm::Type *Tys[] = {Arg->getType()};
8343 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
8344 return Builder.CreateCall(F, {Arg}, "vrndn"); }
8345
8346 case NEON::BI__builtin_neon_vset_lane_i8:
8347 case NEON::BI__builtin_neon_vset_lane_i16:
8348 case NEON::BI__builtin_neon_vset_lane_i32:
8349 case NEON::BI__builtin_neon_vset_lane_i64:
8350 case NEON::BI__builtin_neon_vset_lane_bf16:
8351 case NEON::BI__builtin_neon_vset_lane_f32:
8352 case NEON::BI__builtin_neon_vsetq_lane_i8:
8353 case NEON::BI__builtin_neon_vsetq_lane_i16:
8354 case NEON::BI__builtin_neon_vsetq_lane_i32:
8355 case NEON::BI__builtin_neon_vsetq_lane_i64:
8356 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8357 case NEON::BI__builtin_neon_vsetq_lane_f32:
8358 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
8359
8360 case NEON::BI__builtin_neon_vsha1h_u32:
8361 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
8362 "vsha1h");
8363 case NEON::BI__builtin_neon_vsha1cq_u32:
8364 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
8365 "vsha1h");
8366 case NEON::BI__builtin_neon_vsha1pq_u32:
8367 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
8368 "vsha1h");
8369 case NEON::BI__builtin_neon_vsha1mq_u32:
8370 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
8371 "vsha1h");
8372
8373 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
8374 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
8375 "vcvtbfp2bf");
8376 }
8377
8378 // The ARM _MoveToCoprocessor builtins put the input register value as
8379 // the first argument, but the LLVM intrinsic expects it as the third one.
8380 case clang::ARM::BI_MoveToCoprocessor:
8381 case clang::ARM::BI_MoveToCoprocessor2: {
8382 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
8383 ? Intrinsic::arm_mcr
8384 : Intrinsic::arm_mcr2);
8385 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
8386 Ops[3], Ops[4], Ops[5]});
8387 }
8388 }
8389
8390 // Get the last argument, which specifies the vector type.
8391 assert(HasExtraArg);
8392 const Expr *Arg = E->getArg(E->getNumArgs()-1);
8393 std::optional<llvm::APSInt> Result =
8395 if (!Result)
8396 return nullptr;
8397
8398 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
8399 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
8400 // Determine the overloaded type of this builtin.
8401 llvm::Type *Ty;
8402 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
8403 Ty = FloatTy;
8404 else
8405 Ty = DoubleTy;
8406
8407 // Determine whether this is an unsigned conversion or not.
8408 bool usgn = Result->getZExtValue() == 1;
8409 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
8410
8411 // Call the appropriate intrinsic.
8412 Function *F = CGM.getIntrinsic(Int, Ty);
8413 return Builder.CreateCall(F, Ops, "vcvtr");
8414 }
8415
8416 // Determine the type of this overloaded NEON intrinsic.
8417 NeonTypeFlags Type = Result->getZExtValue();
8418 bool usgn = Type.isUnsigned();
8419 bool rightShift = false;
8420
8421 llvm::FixedVectorType *VTy =
8422 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
8423 getTarget().hasBFloat16Type());
8424 llvm::Type *Ty = VTy;
8425 if (!Ty)
8426 return nullptr;
8427
8428 // Many NEON builtins have identical semantics and uses in ARM and
8429 // AArch64. Emit these in a single function.
8430 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
8431 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
8432 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
8433 if (Builtin)
8435 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
8436 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
8437
8438 unsigned Int;
8439 switch (BuiltinID) {
8440 default: return nullptr;
8441 case NEON::BI__builtin_neon_vld1q_lane_v:
8442 // Handle 64-bit integer elements as a special case. Use shuffles of
8443 // one-element vectors to avoid poor code for i64 in the backend.
8444 if (VTy->getElementType()->isIntegerTy(64)) {
8445 // Extract the other lane.
8446 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8447 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
8448 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
8449 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8450 // Load the value as a one-element vector.
8451 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
8452 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8453 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
8454 Value *Align = getAlignmentValue32(PtrOp0);
8455 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
8456 // Combine them.
8457 int Indices[] = {1 - Lane, Lane};
8458 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
8459 }
8460 [[fallthrough]];
8461 case NEON::BI__builtin_neon_vld1_lane_v: {
8462 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8463 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8464 Value *Ld = Builder.CreateLoad(PtrOp0);
8465 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
8466 }
8467 case NEON::BI__builtin_neon_vqrshrn_n_v:
8468 Int =
8469 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
8470 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
8471 1, true);
8472 case NEON::BI__builtin_neon_vqrshrun_n_v:
8473 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
8474 Ops, "vqrshrun_n", 1, true);
8475 case NEON::BI__builtin_neon_vqshrn_n_v:
8476 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
8477 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
8478 1, true);
8479 case NEON::BI__builtin_neon_vqshrun_n_v:
8480 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
8481 Ops, "vqshrun_n", 1, true);
8482 case NEON::BI__builtin_neon_vrecpe_v:
8483 case NEON::BI__builtin_neon_vrecpeq_v:
8484 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
8485 Ops, "vrecpe");
8486 case NEON::BI__builtin_neon_vrshrn_n_v:
8487 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
8488 Ops, "vrshrn_n", 1, true);
8489 case NEON::BI__builtin_neon_vrsra_n_v:
8490 case NEON::BI__builtin_neon_vrsraq_n_v:
8491 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8492 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8493 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
8494 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
8495 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
8496 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
8497 case NEON::BI__builtin_neon_vsri_n_v:
8498 case NEON::BI__builtin_neon_vsriq_n_v:
8499 rightShift = true;
8500 [[fallthrough]];
8501 case NEON::BI__builtin_neon_vsli_n_v:
8502 case NEON::BI__builtin_neon_vsliq_n_v:
8503 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
8504 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
8505 Ops, "vsli_n");
8506 case NEON::BI__builtin_neon_vsra_n_v:
8507 case NEON::BI__builtin_neon_vsraq_n_v:
8508 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8509 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8510 return Builder.CreateAdd(Ops[0], Ops[1]);
8511 case NEON::BI__builtin_neon_vst1q_lane_v:
8512 // Handle 64-bit integer elements as a special case. Use a shuffle to get
8513 // a one-element vector and avoid poor code for i64 in the backend.
8514 if (VTy->getElementType()->isIntegerTy(64)) {
8515 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8516 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
8517 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8518 Ops[2] = getAlignmentValue32(PtrOp0);
8519 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
8520 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
8521 Tys), Ops);
8522 }
8523 [[fallthrough]];
8524 case NEON::BI__builtin_neon_vst1_lane_v: {
8525 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8526 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8527 return Builder.CreateStore(Ops[1],
8528 PtrOp0.withElementType(Ops[1]->getType()));
8529 }
8530 case NEON::BI__builtin_neon_vtbl1_v:
8531 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
8532 Ops, "vtbl1");
8533 case NEON::BI__builtin_neon_vtbl2_v:
8534 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
8535 Ops, "vtbl2");
8536 case NEON::BI__builtin_neon_vtbl3_v:
8537 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
8538 Ops, "vtbl3");
8539 case NEON::BI__builtin_neon_vtbl4_v:
8540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
8541 Ops, "vtbl4");
8542 case NEON::BI__builtin_neon_vtbx1_v:
8543 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
8544 Ops, "vtbx1");
8545 case NEON::BI__builtin_neon_vtbx2_v:
8546 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
8547 Ops, "vtbx2");
8548 case NEON::BI__builtin_neon_vtbx3_v:
8549 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
8550 Ops, "vtbx3");
8551 case NEON::BI__builtin_neon_vtbx4_v:
8552 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
8553 Ops, "vtbx4");
8554 }
8555}
8556
8557template<typename Integer>
8559 return E->getIntegerConstantExpr(Context)->getExtValue();
8560}
8561
8562static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
8563 llvm::Type *T, bool Unsigned) {
8564 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
8565 // which finds it convenient to specify signed/unsigned as a boolean flag.
8566 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
8567}
8568
8569static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
8570 uint32_t Shift, bool Unsigned) {
8571 // MVE helper function for integer shift right. This must handle signed vs
8572 // unsigned, and also deal specially with the case where the shift count is
8573 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
8574 // undefined behavior, but in MVE it's legal, so we must convert it to code
8575 // that is not undefined in IR.
8576 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
8577 ->getElementType()
8578 ->getPrimitiveSizeInBits();
8579 if (Shift == LaneBits) {
8580 // An unsigned shift of the full lane size always generates zero, so we can
8581 // simply emit a zero vector. A signed shift of the full lane size does the
8582 // same thing as shifting by one bit fewer.
8583 if (Unsigned)
8584 return llvm::Constant::getNullValue(V->getType());
8585 else
8586 --Shift;
8587 }
8588 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
8589}
8590
8591static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
8592 // MVE-specific helper function for a vector splat, which infers the element
8593 // count of the output vector by knowing that MVE vectors are all 128 bits
8594 // wide.
8595 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
8596 return Builder.CreateVectorSplat(Elements, V);
8597}
8598
8599static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
8600 CodeGenFunction *CGF,
8601 llvm::Value *V,
8602 llvm::Type *DestType) {
8603 // Convert one MVE vector type into another by reinterpreting its in-register
8604 // format.
8605 //
8606 // Little-endian, this is identical to a bitcast (which reinterprets the
8607 // memory format). But big-endian, they're not necessarily the same, because
8608 // the register and memory formats map to each other differently depending on
8609 // the lane size.
8610 //
8611 // We generate a bitcast whenever we can (if we're little-endian, or if the
8612 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
8613 // that performs the different kind of reinterpretation.
8614 if (CGF->getTarget().isBigEndian() &&
8615 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
8616 return Builder.CreateCall(
8617 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
8618 {DestType, V->getType()}),
8619 V);
8620 } else {
8621 return Builder.CreateBitCast(V, DestType);
8622 }
8623}
8624
8625static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
8626 // Make a shufflevector that extracts every other element of a vector (evens
8627 // or odds, as desired).
8628 SmallVector<int, 16> Indices;
8629 unsigned InputElements =
8630 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
8631 for (unsigned i = 0; i < InputElements; i += 2)
8632 Indices.push_back(i + Odd);
8633 return Builder.CreateShuffleVector(V, Indices);
8634}
8635
8636static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
8637 llvm::Value *V1) {
8638 // Make a shufflevector that interleaves two vectors element by element.
8639 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
8640 SmallVector<int, 16> Indices;
8641 unsigned InputElements =
8642 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
8643 for (unsigned i = 0; i < InputElements; i++) {
8644 Indices.push_back(i);
8645 Indices.push_back(i + InputElements);
8646 }
8647 return Builder.CreateShuffleVector(V0, V1, Indices);
8648}
8649
8650template<unsigned HighBit, unsigned OtherBits>
8651static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
8652 // MVE-specific helper function to make a vector splat of a constant such as
8653 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
8654 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
8655 unsigned LaneBits = T->getPrimitiveSizeInBits();
8656 uint32_t Value = HighBit << (LaneBits - 1);
8657 if (OtherBits)
8658 Value |= (1UL << (LaneBits - 1)) - 1;
8659 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
8660 return ARMMVEVectorSplat(Builder, Lane);
8661}
8662
8663static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
8664 llvm::Value *V,
8665 unsigned ReverseWidth) {
8666 // MVE-specific helper function which reverses the elements of a
8667 // vector within every (ReverseWidth)-bit collection of lanes.
8668 SmallVector<int, 16> Indices;
8669 unsigned LaneSize = V->getType()->getScalarSizeInBits();
8670 unsigned Elements = 128 / LaneSize;
8671 unsigned Mask = ReverseWidth / LaneSize - 1;
8672 for (unsigned i = 0; i < Elements; i++)
8673 Indices.push_back(i ^ Mask);
8674 return Builder.CreateShuffleVector(V, Indices);
8675}
8676
8678 const CallExpr *E,
8679 ReturnValueSlot ReturnValue,
8680 llvm::Triple::ArchType Arch) {
8681 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
8682 Intrinsic::ID IRIntr;
8683 unsigned NumVectors;
8684
8685 // Code autogenerated by Tablegen will handle all the simple builtins.
8686 switch (BuiltinID) {
8687 #include "clang/Basic/arm_mve_builtin_cg.inc"
8688
8689 // If we didn't match an MVE builtin id at all, go back to the
8690 // main EmitARMBuiltinExpr.
8691 default:
8692 return nullptr;
8693 }
8694
8695 // Anything that breaks from that switch is an MVE builtin that
8696 // needs handwritten code to generate.
8697
8698 switch (CustomCodeGenType) {
8699
8700 case CustomCodeGen::VLD24: {
8703
8704 auto MvecCType = E->getType();
8705 auto MvecLType = ConvertType(MvecCType);
8706 assert(MvecLType->isStructTy() &&
8707 "Return type for vld[24]q should be a struct");
8708 assert(MvecLType->getStructNumElements() == 1 &&
8709 "Return-type struct for vld[24]q should have one element");
8710 auto MvecLTypeInner = MvecLType->getStructElementType(0);
8711 assert(MvecLTypeInner->isArrayTy() &&
8712 "Return-type struct for vld[24]q should contain an array");
8713 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
8714 "Array member of return-type struct vld[24]q has wrong length");
8715 auto VecLType = MvecLTypeInner->getArrayElementType();
8716
8717 Tys.push_back(VecLType);
8718
8719 auto Addr = E->getArg(0);
8720 Ops.push_back(EmitScalarExpr(Addr));
8721 Tys.push_back(ConvertType(Addr->getType()));
8722
8723 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
8724 Value *LoadResult = Builder.CreateCall(F, Ops);
8725 Value *MvecOut = PoisonValue::get(MvecLType);
8726 for (unsigned i = 0; i < NumVectors; ++i) {
8727 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
8728 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
8729 }
8730
8731 if (ReturnValue.isNull())
8732 return MvecOut;
8733 else
8734 return Builder.CreateStore(MvecOut, ReturnValue.getValue());
8735 }
8736
8737 case CustomCodeGen::VST24: {
8740
8741 auto Addr = E->getArg(0);
8742 Ops.push_back(EmitScalarExpr(Addr));
8743 Tys.push_back(ConvertType(Addr->getType()));
8744
8745 auto MvecCType = E->getArg(1)->getType();
8746 auto MvecLType = ConvertType(MvecCType);
8747 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
8748 assert(MvecLType->getStructNumElements() == 1 &&
8749 "Data-type struct for vst2q should have one element");
8750 auto MvecLTypeInner = MvecLType->getStructElementType(0);
8751 assert(MvecLTypeInner->isArrayTy() &&
8752 "Data-type struct for vst2q should contain an array");
8753 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
8754 "Array member of return-type struct vld[24]q has wrong length");
8755 auto VecLType = MvecLTypeInner->getArrayElementType();
8756
8757 Tys.push_back(VecLType);
8758
8759 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
8760 EmitAggExpr(E->getArg(1), MvecSlot);
8761 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
8762 for (unsigned i = 0; i < NumVectors; i++)
8763 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
8764
8765 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
8766 Value *ToReturn = nullptr;
8767 for (unsigned i = 0; i < NumVectors; i++) {
8768 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
8769 ToReturn = Builder.CreateCall(F, Ops);
8770 Ops.pop_back();
8771 }
8772 return ToReturn;
8773 }
8774 }
8775 llvm_unreachable("unknown custom codegen type.");
8776}
8777
8779 const CallExpr *E,
8780 ReturnValueSlot ReturnValue,
8781 llvm::Triple::ArchType Arch) {
8782 switch (BuiltinID) {
8783 default:
8784 return nullptr;
8785#include "clang/Basic/arm_cde_builtin_cg.inc"
8786 }
8787}
8788
8789static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
8790 const CallExpr *E,
8792 llvm::Triple::ArchType Arch) {
8793 unsigned int Int = 0;
8794 const char *s = nullptr;
8795
8796 switch (BuiltinID) {
8797 default:
8798 return nullptr;
8799 case NEON::BI__builtin_neon_vtbl1_v:
8800 case NEON::BI__builtin_neon_vqtbl1_v:
8801 case NEON::BI__builtin_neon_vqtbl1q_v:
8802 case NEON::BI__builtin_neon_vtbl2_v:
8803 case NEON::BI__builtin_neon_vqtbl2_v:
8804 case NEON::BI__builtin_neon_vqtbl2q_v:
8805 case NEON::BI__builtin_neon_vtbl3_v:
8806 case NEON::BI__builtin_neon_vqtbl3_v:
8807 case NEON::BI__builtin_neon_vqtbl3q_v:
8808 case NEON::BI__builtin_neon_vtbl4_v:
8809 case NEON::BI__builtin_neon_vqtbl4_v:
8810 case NEON::BI__builtin_neon_vqtbl4q_v:
8811 break;
8812 case NEON::BI__builtin_neon_vtbx1_v:
8813 case NEON::BI__builtin_neon_vqtbx1_v:
8814 case NEON::BI__builtin_neon_vqtbx1q_v:
8815 case NEON::BI__builtin_neon_vtbx2_v:
8816 case NEON::BI__builtin_neon_vqtbx2_v:
8817 case NEON::BI__builtin_neon_vqtbx2q_v:
8818 case NEON::BI__builtin_neon_vtbx3_v:
8819 case NEON::BI__builtin_neon_vqtbx3_v:
8820 case NEON::BI__builtin_neon_vqtbx3q_v:
8821 case NEON::BI__builtin_neon_vtbx4_v:
8822 case NEON::BI__builtin_neon_vqtbx4_v:
8823 case NEON::BI__builtin_neon_vqtbx4q_v:
8824 break;
8825 }
8826
8827 assert(E->getNumArgs() >= 3);
8828
8829 // Get the last argument, which specifies the vector type.
8830 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8831 std::optional<llvm::APSInt> Result =
8833 if (!Result)
8834 return nullptr;
8835
8836 // Determine the type of this overloaded NEON intrinsic.
8837 NeonTypeFlags Type = Result->getZExtValue();
8838 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
8839 if (!Ty)
8840 return nullptr;
8841
8842 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8843
8844 // AArch64 scalar builtins are not overloaded, they do not have an extra
8845 // argument that specifies the vector type, need to handle each case.
8846 switch (BuiltinID) {
8847 case NEON::BI__builtin_neon_vtbl1_v: {
8848 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
8849 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
8850 }
8851 case NEON::BI__builtin_neon_vtbl2_v: {
8852 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
8853 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
8854 }
8855 case NEON::BI__builtin_neon_vtbl3_v: {
8856 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
8857 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
8858 }
8859 case NEON::BI__builtin_neon_vtbl4_v: {
8860 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
8861 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
8862 }
8863 case NEON::BI__builtin_neon_vtbx1_v: {
8864 Value *TblRes =
8865 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
8866 Intrinsic::aarch64_neon_tbl1, "vtbl1");
8867
8868 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
8869 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
8870 CmpRes = Builder.CreateSExt(CmpRes, Ty);
8871
8872 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
8873 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
8874 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
8875 }
8876 case NEON::BI__builtin_neon_vtbx2_v: {
8877 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
8878 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
8879 }
8880 case NEON::BI__builtin_neon_vtbx3_v: {
8881 Value *TblRes =
8882 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
8883 Intrinsic::aarch64_neon_tbl2, "vtbl2");
8884
8885 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
8886 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
8887 TwentyFourV);
8888 CmpRes = Builder.CreateSExt(CmpRes, Ty);
8889
8890 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
8891 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
8892 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
8893 }
8894 case NEON::BI__builtin_neon_vtbx4_v: {
8895 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
8896 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
8897 }
8898 case NEON::BI__builtin_neon_vqtbl1_v:
8899 case NEON::BI__builtin_neon_vqtbl1q_v:
8900 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
8901 case NEON::BI__builtin_neon_vqtbl2_v:
8902 case NEON::BI__builtin_neon_vqtbl2q_v: {
8903 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
8904 case NEON::BI__builtin_neon_vqtbl3_v:
8905 case NEON::BI__builtin_neon_vqtbl3q_v:
8906 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
8907 case NEON::BI__builtin_neon_vqtbl4_v:
8908 case NEON::BI__builtin_neon_vqtbl4q_v:
8909 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
8910 case NEON::BI__builtin_neon_vqtbx1_v:
8911 case NEON::BI__builtin_neon_vqtbx1q_v:
8912 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
8913 case NEON::BI__builtin_neon_vqtbx2_v:
8914 case NEON::BI__builtin_neon_vqtbx2q_v:
8915 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
8916 case NEON::BI__builtin_neon_vqtbx3_v:
8917 case NEON::BI__builtin_neon_vqtbx3q_v:
8918 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
8919 case NEON::BI__builtin_neon_vqtbx4_v:
8920 case NEON::BI__builtin_neon_vqtbx4q_v:
8921 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
8922 }
8923 }
8924
8925 if (!Int)
8926 return nullptr;
8927
8928 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
8929 return CGF.EmitNeonCall(F, Ops, s);
8930}
8931
8933 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
8934 Op = Builder.CreateBitCast(Op, Int16Ty);
8935 Value *V = PoisonValue::get(VTy);
8936 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8937 Op = Builder.CreateInsertElement(V, Op, CI);
8938 return Op;
8939}
8940
8941/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
8942/// access builtin. Only required if it can't be inferred from the base pointer
8943/// operand.
8944llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
8945 switch (TypeFlags.getMemEltType()) {
8946 case SVETypeFlags::MemEltTyDefault:
8947 return getEltType(TypeFlags);
8948 case SVETypeFlags::MemEltTyInt8:
8949 return Builder.getInt8Ty();
8950 case SVETypeFlags::MemEltTyInt16:
8951 return Builder.getInt16Ty();
8952 case SVETypeFlags::MemEltTyInt32:
8953 return Builder.getInt32Ty();
8954 case SVETypeFlags::MemEltTyInt64:
8955 return Builder.getInt64Ty();
8956 }
8957 llvm_unreachable("Unknown MemEltType");
8958}
8959
8960llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
8961 switch (TypeFlags.getEltType()) {
8962 default:
8963 llvm_unreachable("Invalid SVETypeFlag!");
8964
8965 case SVETypeFlags::EltTyInt8:
8966 return Builder.getInt8Ty();
8967 case SVETypeFlags::EltTyInt16:
8968 return Builder.getInt16Ty();
8969 case SVETypeFlags::EltTyInt32:
8970 return Builder.getInt32Ty();
8971 case SVETypeFlags::EltTyInt64:
8972 return Builder.getInt64Ty();
8973 case SVETypeFlags::EltTyInt128:
8974 return Builder.getInt128Ty();
8975
8976 case SVETypeFlags::EltTyFloat16:
8977 return Builder.getHalfTy();
8978 case SVETypeFlags::EltTyFloat32:
8979 return Builder.getFloatTy();
8980 case SVETypeFlags::EltTyFloat64:
8981 return Builder.getDoubleTy();
8982
8983 case SVETypeFlags::EltTyBFloat16:
8984 return Builder.getBFloatTy();
8985
8986 case SVETypeFlags::EltTyBool8:
8987 case SVETypeFlags::EltTyBool16:
8988 case SVETypeFlags::EltTyBool32:
8989 case SVETypeFlags::EltTyBool64:
8990 return Builder.getInt1Ty();
8991 }
8992}
8993
8994// Return the llvm predicate vector type corresponding to the specified element
8995// TypeFlags.
8996llvm::ScalableVectorType *
8998 switch (TypeFlags.getEltType()) {
8999 default: llvm_unreachable("Unhandled SVETypeFlag!");
9000
9001 case SVETypeFlags::EltTyInt8:
9002 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9003 case SVETypeFlags::EltTyInt16:
9004 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9005 case SVETypeFlags::EltTyInt32:
9006 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9007 case SVETypeFlags::EltTyInt64:
9008 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9009
9010 case SVETypeFlags::EltTyBFloat16:
9011 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9012 case SVETypeFlags::EltTyFloat16:
9013 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9014 case SVETypeFlags::EltTyFloat32:
9015 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9016 case SVETypeFlags::EltTyFloat64:
9017 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9018
9019 case SVETypeFlags::EltTyBool8:
9020 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9021 case SVETypeFlags::EltTyBool16:
9022 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9023 case SVETypeFlags::EltTyBool32:
9024 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9025 case SVETypeFlags::EltTyBool64:
9026 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9027 }
9028}
9029
9030// Return the llvm vector type corresponding to the specified element TypeFlags.
9031llvm::ScalableVectorType *
9032CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9033 switch (TypeFlags.getEltType()) {
9034 default:
9035 llvm_unreachable("Invalid SVETypeFlag!");
9036
9037 case SVETypeFlags::EltTyInt8:
9038 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9039 case SVETypeFlags::EltTyInt16:
9040 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9041 case SVETypeFlags::EltTyInt32:
9042 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9043 case SVETypeFlags::EltTyInt64:
9044 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9045
9046 case SVETypeFlags::EltTyFloat16:
9047 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9048 case SVETypeFlags::EltTyBFloat16:
9049 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9050 case SVETypeFlags::EltTyFloat32:
9051 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9052 case SVETypeFlags::EltTyFloat64:
9053 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9054
9055 case SVETypeFlags::EltTyBool8:
9056 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9057 case SVETypeFlags::EltTyBool16:
9058 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9059 case SVETypeFlags::EltTyBool32:
9060 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9061 case SVETypeFlags::EltTyBool64:
9062 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9063 }
9064}
9065
9066llvm::Value *
9068 Function *Ptrue =
9069 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9070 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9071}
9072
9073constexpr unsigned SVEBitsPerBlock = 128;
9074
9075static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9076 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9077 return llvm::ScalableVectorType::get(EltTy, NumElts);
9078}
9079
9080// Reinterpret the input predicate so that it can be used to correctly isolate
9081// the elements of the specified datatype.
9083 llvm::ScalableVectorType *VTy) {
9084 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9085 if (Pred->getType() == RTy)
9086 return Pred;
9087
9088 unsigned IntID;
9089 llvm::Type *IntrinsicTy;
9090 switch (VTy->getMinNumElements()) {
9091 default:
9092 llvm_unreachable("unsupported element count!");
9093 case 1:
9094 case 2:
9095 case 4:
9096 case 8:
9097 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9098 IntrinsicTy = RTy;
9099 break;
9100 case 16:
9101 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9102 IntrinsicTy = Pred->getType();
9103 break;
9104 }
9105
9106 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9107 Value *C = Builder.CreateCall(F, Pred);
9108 assert(C->getType() == RTy && "Unexpected return type!");
9109 return C;
9110}
9111
9114 unsigned IntID) {
9115 auto *ResultTy = getSVEType(TypeFlags);
9116 auto *OverloadedTy =
9117 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9118
9119 // At the ACLE level there's only one predicate type, svbool_t, which is
9120 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9121 // actual type being loaded. For example, when loading doubles (i64) the
9122 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9123 // the predicate and the data being loaded must match. Cast accordingly.
9124 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9125
9126 Function *F = nullptr;
9127 if (Ops[1]->getType()->isVectorTy())
9128 // This is the "vector base, scalar offset" case. In order to uniquely
9129 // map this built-in to an LLVM IR intrinsic, we need both the return type
9130 // and the type of the vector base.
9131 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9132 else
9133 // This is the "scalar base, vector offset case". The type of the offset
9134 // is encoded in the name of the intrinsic. We only need to specify the
9135 // return type in order to uniquely map this built-in to an LLVM IR
9136 // intrinsic.
9137 F = CGM.getIntrinsic(IntID, OverloadedTy);
9138
9139 // Pass 0 when the offset is missing. This can only be applied when using
9140 // the "vector base" addressing mode for which ACLE allows no offset. The
9141 // corresponding LLVM IR always requires an offset.
9142 if (Ops.size() == 2) {
9143 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9144 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9145 }
9146
9147 // For "vector base, scalar index" scale the index so that it becomes a
9148 // scalar offset.
9149 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9150 unsigned BytesPerElt =
9151 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9152 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9153 }
9154
9155 Value *Call = Builder.CreateCall(F, Ops);
9156
9157 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9158 // other cases it's folded into a nop.
9159 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9160 : Builder.CreateSExt(Call, ResultTy);
9161}
9162
9165 unsigned IntID) {
9166 auto *SrcDataTy = getSVEType(TypeFlags);
9167 auto *OverloadedTy =
9168 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9169
9170 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9171 // it's the first argument. Move it accordingly.
9172 Ops.insert(Ops.begin(), Ops.pop_back_val());
9173
9174 Function *F = nullptr;
9175 if (Ops[2]->getType()->isVectorTy())
9176 // This is the "vector base, scalar offset" case. In order to uniquely
9177 // map this built-in to an LLVM IR intrinsic, we need both the return type
9178 // and the type of the vector base.
9179 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9180 else
9181 // This is the "scalar base, vector offset case". The type of the offset
9182 // is encoded in the name of the intrinsic. We only need to specify the
9183 // return type in order to uniquely map this built-in to an LLVM IR
9184 // intrinsic.
9185 F = CGM.getIntrinsic(IntID, OverloadedTy);
9186
9187 // Pass 0 when the offset is missing. This can only be applied when using
9188 // the "vector base" addressing mode for which ACLE allows no offset. The
9189 // corresponding LLVM IR always requires an offset.
9190 if (Ops.size() == 3) {
9191 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9192 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9193 }
9194
9195 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9196 // folded into a nop.
9197 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9198
9199 // At the ACLE level there's only one predicate type, svbool_t, which is
9200 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9201 // actual type being stored. For example, when storing doubles (i64) the
9202 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9203 // the predicate and the data being stored must match. Cast accordingly.
9204 Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
9205
9206 // For "vector base, scalar index" scale the index so that it becomes a
9207 // scalar offset.
9208 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9209 unsigned BytesPerElt =
9210 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9211 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9212 }
9213
9214 return Builder.CreateCall(F, Ops);
9215}
9216
9219 unsigned IntID) {
9220 // The gather prefetches are overloaded on the vector input - this can either
9221 // be the vector of base addresses or vector of offsets.
9222 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9223 if (!OverloadedTy)
9224 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9225
9226 // Cast the predicate from svbool_t to the right number of elements.
9227 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9228
9229 // vector + imm addressing modes
9230 if (Ops[1]->getType()->isVectorTy()) {
9231 if (Ops.size() == 3) {
9232 // Pass 0 for 'vector+imm' when the index is omitted.
9233 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9234
9235 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9236 std::swap(Ops[2], Ops[3]);
9237 } else {
9238 // Index needs to be passed as scaled offset.
9239 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9240 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9241 if (BytesPerElt > 1)
9242 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9243 }
9244 }
9245
9246 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9247 return Builder.CreateCall(F, Ops);
9248}
9249
9252 unsigned IntID) {
9253 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9254 auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
9255 auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
9256
9257 unsigned N;
9258 switch (IntID) {
9259 case Intrinsic::aarch64_sve_ld2_sret:
9260 N = 2;
9261 break;
9262 case Intrinsic::aarch64_sve_ld3_sret:
9263 N = 3;
9264 break;
9265 case Intrinsic::aarch64_sve_ld4_sret:
9266 N = 4;
9267 break;
9268 default:
9269 llvm_unreachable("unknown intrinsic!");
9270 }
9271 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
9272 VTy->getElementCount() * N);
9273
9274 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9275 Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
9276
9277 // Does the load have an offset?
9278 if (Ops.size() > 2)
9279 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9280
9281 BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
9282 Function *F = CGM.getIntrinsic(IntID, {VTy});
9283 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
9284 unsigned MinElts = VTy->getMinNumElements();
9285 Value *Ret = llvm::PoisonValue::get(RetTy);
9286 for (unsigned I = 0; I < N; I++) {
9287 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9288 Value *SRet = Builder.CreateExtractValue(Call, I);
9289 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
9290 }
9291 return Ret;
9292}
9293
9296 unsigned IntID) {
9297 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9298 auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
9299 auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
9300
9301 unsigned N;
9302 switch (IntID) {
9303 case Intrinsic::aarch64_sve_st2:
9304 N = 2;
9305 break;
9306 case Intrinsic::aarch64_sve_st3:
9307 N = 3;
9308 break;
9309 case Intrinsic::aarch64_sve_st4:
9310 N = 4;
9311 break;
9312 default:
9313 llvm_unreachable("unknown intrinsic!");
9314 }
9315
9316 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9317 Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
9318
9319 // Does the store have an offset?
9320 if (Ops.size() > 3)
9321 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9322
9323 BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
9324 Value *Val = Ops.back();
9325
9326 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
9327 // need to break up the tuple vector.
9329 unsigned MinElts = VTy->getElementCount().getKnownMinValue();
9330 for (unsigned I = 0; I < N; ++I) {
9331 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9332 Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx));
9333 }
9334 Operands.append({Predicate, BasePtr});
9335
9336 Function *F = CGM.getIntrinsic(IntID, { VTy });
9337 return Builder.CreateCall(F, Operands);
9338}
9339
9340// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
9341// svpmullt_pair intrinsics, with the exception that their results are bitcast
9342// to a wider type.
9345 unsigned BuiltinID) {
9346 // Splat scalar operand to vector (intrinsics with _n infix)
9347 if (TypeFlags.hasSplatOperand()) {
9348 unsigned OpNo = TypeFlags.getSplatOperand();
9349 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9350 }
9351
9352 // The pair-wise function has a narrower overloaded type.
9353 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
9354 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
9355
9356 // Now bitcast to the wider result type.
9357 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
9358 return EmitSVEReinterpret(Call, Ty);
9359}
9360
9362 ArrayRef<Value *> Ops, unsigned BuiltinID) {
9363 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
9364 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
9365 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
9366}
9367
9370 unsigned BuiltinID) {
9371 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9372 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
9373 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9374
9375 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9376 Value *BasePtr = Ops[1];
9377
9378 // Implement the index operand if not omitted.
9379 if (Ops.size() > 3)
9380 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9381
9382 Value *PrfOp = Ops.back();
9383
9384 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
9385 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
9386}
9387
9389 llvm::Type *ReturnTy,
9391 unsigned BuiltinID,
9392 bool IsZExtReturn) {
9393 QualType LangPTy = E->getArg(1)->getType();
9394 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9395 LangPTy->castAs<PointerType>()->getPointeeType());
9396
9397 // The vector type that is returned may be different from the
9398 // eventual type loaded from memory.
9399 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
9400 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9401
9402 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9403 Value *BasePtr = Ops[1];
9404
9405 // Does the load have an offset?
9406 if (Ops.size() > 2)
9407 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9408
9409 Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9410 auto *Load =
9411 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
9412 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9413 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
9414
9415 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
9416 : Builder.CreateSExt(Load, VectorTy);
9417}
9418
9421 unsigned BuiltinID) {
9422 QualType LangPTy = E->getArg(1)->getType();
9423 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9424 LangPTy->castAs<PointerType>()->getPointeeType());
9425
9426 // The vector type that is stored may be different from the
9427 // eventual type stored to memory.
9428 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
9429 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9430
9431 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9432 Value *BasePtr = Ops[1];
9433
9434 // Does the store have an offset?
9435 if (Ops.size() == 4)
9436 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9437
9438 // Last value is always the data
9439 llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
9440
9441 Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
9442 auto *Store =
9443 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
9444 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9445 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
9446 return Store;
9447}
9448
9450 llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false);
9451 return Builder.CreateAdd(Base, CastOffset, "tileslice");
9452}
9453
9456 unsigned IntID) {
9457 Ops[3] = EmitSVEPredicateCast(
9459
9460 SmallVector<Value *> NewOps;
9461 NewOps.push_back(Ops[3]);
9462
9463 llvm::Value *BasePtr = Ops[4];
9464
9465 // If the intrinsic contains the vnum parameter, multiply it with the vector
9466 // size in bytes.
9467 if (Ops.size() == 6) {
9468 Function *StreamingVectorLength =
9469 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9470 llvm::Value *StreamingVectorLengthCall =
9471 Builder.CreateCall(StreamingVectorLength);
9472 llvm::Value *Mulvl =
9473 Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl");
9474 // The type of the ptr parameter is void *, so use Int8Ty here.
9475 BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl);
9476 }
9477 NewOps.push_back(BasePtr);
9478 NewOps.push_back(Ops[0]);
9479 NewOps.push_back(EmitTileslice(Ops[2], Ops[1]));
9480 Function *F = CGM.getIntrinsic(IntID);
9481 return Builder.CreateCall(F, NewOps);
9482}
9483
9486 unsigned IntID) {
9487 auto *VecTy = getSVEType(TypeFlags);
9488 Function *F = CGM.getIntrinsic(IntID, VecTy);
9489 if (TypeFlags.isReadZA()) {
9490 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
9491 Ops[3] = EmitTileslice(Ops[4], Ops[3]);
9492 Ops.erase(&Ops[4]);
9493 } else if (TypeFlags.isWriteZA()) {
9494 Ops[1] = EmitTileslice(Ops[2], Ops[1]);
9495 Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy);
9496 Ops.erase(&Ops[3]);
9497 }
9498 return Builder.CreateCall(F, Ops);
9499}
9500
9503 unsigned IntID) {
9504 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
9505 if (Ops.size() == 0)
9506 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
9507 Function *F = CGM.getIntrinsic(IntID, {});
9508 return Builder.CreateCall(F, Ops);
9509}
9510
9513 unsigned IntID) {
9514 Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9515 llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
9516 llvm::Value *MulVL = Builder.CreateMul(
9517 CntsbCall,
9518 Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
9519 "mulvl");
9520 Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL);
9521 Ops[0] = EmitTileslice(Ops[1], Ops[0]);
9522 Ops.erase(&Ops[1]);
9523 Function *F = CGM.getIntrinsic(IntID, {});
9524 return Builder.CreateCall(F, Ops);
9525}
9526
9527// Limit the usage of scalable llvm IR generated by the ACLE by using the
9528// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
9529Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
9530 return Builder.CreateVectorSplat(
9531 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
9532}
9533
9535 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
9536}
9537
9538Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
9539 // FIXME: For big endian this needs an additional REV, or needs a separate
9540 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
9541 // instruction is defined as 'bitwise' equivalent from memory point of
9542 // view (when storing/reloading), whereas the svreinterpret builtin
9543 // implements bitwise equivalent cast from register point of view.
9544 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
9545 return Builder.CreateBitCast(Val, Ty);
9546}
9547
9548static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
9550 auto *SplatZero = Constant::getNullValue(Ty);
9551 Ops.insert(Ops.begin(), SplatZero);
9552}
9553
9554static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
9556 auto *SplatUndef = UndefValue::get(Ty);
9557 Ops.insert(Ops.begin(), SplatUndef);
9558}
9559
9562 llvm::Type *ResultType,
9563 ArrayRef<Value *> Ops) {
9564 if (TypeFlags.isOverloadNone())
9565 return {};
9566
9567 llvm::Type *DefaultType = getSVEType(TypeFlags);
9568
9569 if (TypeFlags.isOverloadWhile())
9570 return {DefaultType, Ops[1]->getType()};
9571
9572 if (TypeFlags.isOverloadWhileRW())
9573 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
9574
9575 if (TypeFlags.isOverloadCvt())
9576 return {Ops[0]->getType(), Ops.back()->getType()};
9577
9578 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
9579 return {DefaultType};
9580}
9581
9583 llvm::Type *Ty,
9584 ArrayRef<Value *> Ops) {
9585 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
9586 "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
9587
9588 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
9589 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
9590 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
9591 Value *Idx = ConstantInt::get(CGM.Int64Ty,
9592 I * SingleVecTy->getMinNumElements());
9593
9594 if (TypeFlags.isTupleSet())
9595 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
9596 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
9597}
9598
9600 llvm::Type *Ty,
9601 ArrayRef<Value *> Ops) {
9602 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
9603
9604 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
9605 unsigned MinElts = SrcTy->getMinNumElements();
9606 Value *Call = llvm::PoisonValue::get(Ty);
9607 for (unsigned I = 0; I < Ops.size(); I++) {
9608 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9609 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
9610 }
9611
9612 return Call;
9613}
9614
9616 const CallExpr *E) {
9617 // Find out if any arguments are required to be integer constant expressions.
9618 unsigned ICEArguments = 0;
9620 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9621 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9622
9623 llvm::Type *Ty = ConvertType(E->getType());
9624 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
9625 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
9626 Value *Val = EmitScalarExpr(E->getArg(0));
9627 return EmitSVEReinterpret(Val, Ty);
9628 }
9629
9631 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9632 if ((ICEArguments & (1 << i)) == 0)
9633 Ops.push_back(EmitScalarExpr(E->getArg(i)));
9634 else {
9635 // If this is required to be a constant, constant fold it so that we know
9636 // that the generated intrinsic gets a ConstantInt.
9637 std::optional<llvm::APSInt> Result =
9639 assert(Result && "Expected argument to be a constant");
9640
9641 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
9642 // truncate because the immediate has been range checked and no valid
9643 // immediate requires more than a handful of bits.
9644 *Result = Result->extOrTrunc(32);
9645 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
9646 }
9647 }
9648
9649 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
9651 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9652 if (TypeFlags.isLoad())
9653 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
9654 TypeFlags.isZExtReturn());
9655 else if (TypeFlags.isStore())
9656 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
9657 else if (TypeFlags.isGatherLoad())
9658 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9659 else if (TypeFlags.isScatterStore())
9660 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9661 else if (TypeFlags.isPrefetch())
9662 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9663 else if (TypeFlags.isGatherPrefetch())
9664 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9665 else if (TypeFlags.isStructLoad())
9666 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9667 else if (TypeFlags.isStructStore())
9668 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9669 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
9670 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
9671 else if (TypeFlags.isTupleCreate())
9672 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
9673 else if (TypeFlags.isUndef())
9674 return UndefValue::get(Ty);
9675 else if (Builtin->LLVMIntrinsic != 0) {
9676 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
9677 InsertExplicitZeroOperand(Builder, Ty, Ops);
9678
9679 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
9680 InsertExplicitUndefOperand(Builder, Ty, Ops);
9681
9682 // Some ACLE builtins leave out the argument to specify the predicate
9683 // pattern, which is expected to be expanded to an SV_ALL pattern.
9684 if (TypeFlags.isAppendSVALL())
9685 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
9686 if (TypeFlags.isInsertOp1SVALL())
9687 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
9688
9689 // Predicates must match the main datatype.
9690 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9691 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
9692 if (PredTy->getElementType()->isIntegerTy(1))
9693 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
9694
9695 // Splat scalar operand to vector (intrinsics with _n infix)
9696 if (TypeFlags.hasSplatOperand()) {
9697 unsigned OpNo = TypeFlags.getSplatOperand();
9698 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9699 }
9700
9701 if (TypeFlags.isReverseCompare())
9702 std::swap(Ops[1], Ops[2]);
9703 else if (TypeFlags.isReverseUSDOT())
9704 std::swap(Ops[1], Ops[2]);
9705 else if (TypeFlags.isReverseMergeAnyBinOp() &&
9706 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
9707 std::swap(Ops[1], Ops[2]);
9708 else if (TypeFlags.isReverseMergeAnyAccOp() &&
9709 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
9710 std::swap(Ops[1], Ops[3]);
9711
9712 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
9713 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
9714 llvm::Type *OpndTy = Ops[1]->getType();
9715 auto *SplatZero = Constant::getNullValue(OpndTy);
9716 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
9717 }
9718
9719 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
9720 getSVEOverloadTypes(TypeFlags, Ty, Ops));
9721 Value *Call = Builder.CreateCall(F, Ops);
9722
9723 // Predicate results must be converted to svbool_t.
9724 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
9725 if (PredTy->getScalarType()->isIntegerTy(1))
9726 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
9727
9728 return Call;
9729 }
9730
9731 switch (BuiltinID) {
9732 default:
9733 return nullptr;
9734
9735 case SVE::BI__builtin_sve_svmov_b_z: {
9736 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
9737 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9738 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
9739 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
9740 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
9741 }
9742
9743 case SVE::BI__builtin_sve_svnot_b_z: {
9744 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
9745 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9746 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
9747 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
9748 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
9749 }
9750
9751 case SVE::BI__builtin_sve_svmovlb_u16:
9752 case SVE::BI__builtin_sve_svmovlb_u32:
9753 case SVE::BI__builtin_sve_svmovlb_u64:
9754 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
9755
9756 case SVE::BI__builtin_sve_svmovlb_s16:
9757 case SVE::BI__builtin_sve_svmovlb_s32:
9758 case SVE::BI__builtin_sve_svmovlb_s64:
9759 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
9760
9761 case SVE::BI__builtin_sve_svmovlt_u16:
9762 case SVE::BI__builtin_sve_svmovlt_u32:
9763 case SVE::BI__builtin_sve_svmovlt_u64:
9764 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
9765
9766 case SVE::BI__builtin_sve_svmovlt_s16:
9767 case SVE::BI__builtin_sve_svmovlt_s32:
9768 case SVE::BI__builtin_sve_svmovlt_s64:
9769 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
9770
9771 case SVE::BI__builtin_sve_svpmullt_u16:
9772 case SVE::BI__builtin_sve_svpmullt_u64:
9773 case SVE::BI__builtin_sve_svpmullt_n_u16:
9774 case SVE::BI__builtin_sve_svpmullt_n_u64:
9775 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
9776
9777 case SVE::BI__builtin_sve_svpmullb_u16:
9778 case SVE::BI__builtin_sve_svpmullb_u64:
9779 case SVE::BI__builtin_sve_svpmullb_n_u16:
9780 case SVE::BI__builtin_sve_svpmullb_n_u64:
9781 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
9782
9783 case SVE::BI__builtin_sve_svdup_n_b8:
9784 case SVE::BI__builtin_sve_svdup_n_b16:
9785 case SVE::BI__builtin_sve_svdup_n_b32:
9786 case SVE::BI__builtin_sve_svdup_n_b64: {
9787 Value *CmpNE =
9788 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
9789 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
9790 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
9791 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
9792 }
9793
9794 case SVE::BI__builtin_sve_svdupq_n_b8:
9795 case SVE::BI__builtin_sve_svdupq_n_b16:
9796 case SVE::BI__builtin_sve_svdupq_n_b32:
9797 case SVE::BI__builtin_sve_svdupq_n_b64:
9798 case SVE::BI__builtin_sve_svdupq_n_u8:
9799 case SVE::BI__builtin_sve_svdupq_n_s8:
9800 case SVE::BI__builtin_sve_svdupq_n_u64:
9801 case SVE::BI__builtin_sve_svdupq_n_f64:
9802 case SVE::BI__builtin_sve_svdupq_n_s64:
9803 case SVE::BI__builtin_sve_svdupq_n_u16:
9804 case SVE::BI__builtin_sve_svdupq_n_f16:
9805 case SVE::BI__builtin_sve_svdupq_n_bf16:
9806 case SVE::BI__builtin_sve_svdupq_n_s16:
9807 case SVE::BI__builtin_sve_svdupq_n_u32:
9808 case SVE::BI__builtin_sve_svdupq_n_f32:
9809 case SVE::BI__builtin_sve_svdupq_n_s32: {
9810 // These builtins are implemented by storing each element to an array and using
9811 // ld1rq to materialize a vector.
9812 unsigned NumOpnds = Ops.size();
9813
9814 bool IsBoolTy =
9815 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
9816
9817 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
9818 // so that the compare can use the width that is natural for the expected
9819 // number of predicate lanes.
9820 llvm::Type *EltTy = Ops[0]->getType();
9821 if (IsBoolTy)
9822 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
9823
9825 for (unsigned I = 0; I < NumOpnds; ++I)
9826 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
9827 Value *Vec = BuildVector(VecOps);
9828
9829 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
9830 Value *InsertSubVec = Builder.CreateInsertVector(
9831 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
9832
9833 Function *F =
9834 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
9835 Value *DupQLane =
9836 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
9837
9838 if (!IsBoolTy)
9839 return DupQLane;
9840
9841 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9842 Value *Pred = EmitSVEAllTruePred(TypeFlags);
9843
9844 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
9845 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
9846 : Intrinsic::aarch64_sve_cmpne_wide,
9847 OverloadedTy);
9848 Value *Call = Builder.CreateCall(
9849 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
9850 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
9851 }
9852
9853 case SVE::BI__builtin_sve_svpfalse_b:
9854 return ConstantInt::getFalse(Ty);
9855
9856 case SVE::BI__builtin_sve_svlen_bf16:
9857 case SVE::BI__builtin_sve_svlen_f16:
9858 case SVE::BI__builtin_sve_svlen_f32:
9859 case SVE::BI__builtin_sve_svlen_f64:
9860 case SVE::BI__builtin_sve_svlen_s8:
9861 case SVE::BI__builtin_sve_svlen_s16:
9862 case SVE::BI__builtin_sve_svlen_s32:
9863 case SVE::BI__builtin_sve_svlen_s64:
9864 case SVE::BI__builtin_sve_svlen_u8:
9865 case SVE::BI__builtin_sve_svlen_u16:
9866 case SVE::BI__builtin_sve_svlen_u32:
9867 case SVE::BI__builtin_sve_svlen_u64: {
9868 SVETypeFlags TF(Builtin->TypeModifier);
9869 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
9870 auto *NumEls =
9871 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
9872
9873 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
9874 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
9875 }
9876
9877 case SVE::BI__builtin_sve_svtbl2_u8:
9878 case SVE::BI__builtin_sve_svtbl2_s8:
9879 case SVE::BI__builtin_sve_svtbl2_u16:
9880 case SVE::BI__builtin_sve_svtbl2_s16:
9881 case SVE::BI__builtin_sve_svtbl2_u32:
9882 case SVE::BI__builtin_sve_svtbl2_s32:
9883 case SVE::BI__builtin_sve_svtbl2_u64:
9884 case SVE::BI__builtin_sve_svtbl2_s64:
9885 case SVE::BI__builtin_sve_svtbl2_f16:
9886 case SVE::BI__builtin_sve_svtbl2_bf16:
9887 case SVE::BI__builtin_sve_svtbl2_f32:
9888 case SVE::BI__builtin_sve_svtbl2_f64: {
9889 SVETypeFlags TF(Builtin->TypeModifier);
9891 Value *V0 = Builder.CreateExtractVector(VTy, Ops[0],
9892 ConstantInt::get(CGM.Int64Ty, 0));
9893 unsigned MinElts = VTy->getMinNumElements();
9894 Value *V1 = Builder.CreateExtractVector(
9895 VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts));
9896 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
9897 return Builder.CreateCall(F, {V0, V1, Ops[1]});
9898 }
9899
9900 case SVE::BI__builtin_sve_svset_neonq_s8:
9901 case SVE::BI__builtin_sve_svset_neonq_s16:
9902 case SVE::BI__builtin_sve_svset_neonq_s32:
9903 case SVE::BI__builtin_sve_svset_neonq_s64:
9904 case SVE::BI__builtin_sve_svset_neonq_u8:
9905 case SVE::BI__builtin_sve_svset_neonq_u16:
9906 case SVE::BI__builtin_sve_svset_neonq_u32:
9907 case SVE::BI__builtin_sve_svset_neonq_u64:
9908 case SVE::BI__builtin_sve_svset_neonq_f16:
9909 case SVE::BI__builtin_sve_svset_neonq_f32:
9910 case SVE::BI__builtin_sve_svset_neonq_f64:
9911 case SVE::BI__builtin_sve_svset_neonq_bf16: {
9912 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
9913 }
9914
9915 case SVE::BI__builtin_sve_svget_neonq_s8:
9916 case SVE::BI__builtin_sve_svget_neonq_s16:
9917 case SVE::BI__builtin_sve_svget_neonq_s32:
9918 case SVE::BI__builtin_sve_svget_neonq_s64:
9919 case SVE::BI__builtin_sve_svget_neonq_u8:
9920 case SVE::BI__builtin_sve_svget_neonq_u16:
9921 case SVE::BI__builtin_sve_svget_neonq_u32:
9922 case SVE::BI__builtin_sve_svget_neonq_u64:
9923 case SVE::BI__builtin_sve_svget_neonq_f16:
9924 case SVE::BI__builtin_sve_svget_neonq_f32:
9925 case SVE::BI__builtin_sve_svget_neonq_f64:
9926 case SVE::BI__builtin_sve_svget_neonq_bf16: {
9927 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
9928 }
9929
9930 case SVE::BI__builtin_sve_svdup_neonq_s8:
9931 case SVE::BI__builtin_sve_svdup_neonq_s16:
9932 case SVE::BI__builtin_sve_svdup_neonq_s32:
9933 case SVE::BI__builtin_sve_svdup_neonq_s64:
9934 case SVE::BI__builtin_sve_svdup_neonq_u8:
9935 case SVE::BI__builtin_sve_svdup_neonq_u16:
9936 case SVE::BI__builtin_sve_svdup_neonq_u32:
9937 case SVE::BI__builtin_sve_svdup_neonq_u64:
9938 case SVE::BI__builtin_sve_svdup_neonq_f16:
9939 case SVE::BI__builtin_sve_svdup_neonq_f32:
9940 case SVE::BI__builtin_sve_svdup_neonq_f64:
9941 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
9942 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
9943 Builder.getInt64(0));
9944 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
9945 {Insert, Builder.getInt64(0)});
9946 }
9947 }
9948
9949 /// Should not happen
9950 return nullptr;
9951}
9952
9954 const CallExpr *E) {
9955 // Find out if any arguments are required to be integer constant expressions.
9956 unsigned ICEArguments = 0;
9958 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9959 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9960
9961 llvm::Type *Ty = ConvertType(E->getType());
9963 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9964 if ((ICEArguments & (1 << i)) == 0)
9965 Ops.push_back(EmitScalarExpr(E->getArg(i)));
9966 else {
9967 // If this is required to be a constant, constant fold it so that we know
9968 // that the generated intrinsic gets a ConstantInt.
9969 std::optional<llvm::APSInt> Result =
9971 assert(Result && "Expected argument to be a constant");
9972
9973 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
9974 // truncate because the immediate has been range checked and no valid
9975 // immediate requires more than a handful of bits.
9976 *Result = Result->extOrTrunc(32);
9977 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
9978 }
9979 }
9980
9981 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
9983 SVETypeFlags TypeFlags(Builtin->TypeModifier);
9984 if (TypeFlags.isLoad() || TypeFlags.isStore())
9985 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9986 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
9987 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9988 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
9989 BuiltinID == SME::BI__builtin_sme_svzero_za)
9990 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9991 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
9992 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za)
9993 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
9994 else if (Builtin->LLVMIntrinsic != 0) {
9995 // Predicates must match the main datatype.
9996 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9997 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
9998 if (PredTy->getElementType()->isIntegerTy(1))
9999 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10000
10001 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10002 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10003 Value *Call = Builder.CreateCall(F, Ops);
10004 return Call;
10005 }
10006
10007 /// Should not happen
10008 return nullptr;
10009}
10010
10012 const CallExpr *E,
10013 llvm::Triple::ArchType Arch) {
10014 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10015 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10016 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10017
10018 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10019 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10020 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10021
10022 unsigned HintID = static_cast<unsigned>(-1);
10023 switch (BuiltinID) {
10024 default: break;
10025 case clang::AArch64::BI__builtin_arm_nop:
10026 HintID = 0;
10027 break;
10028 case clang::AArch64::BI__builtin_arm_yield:
10029 case clang::AArch64::BI__yield:
10030 HintID = 1;
10031 break;
10032 case clang::AArch64::BI__builtin_arm_wfe:
10033 case clang::AArch64::BI__wfe:
10034 HintID = 2;
10035 break;
10036 case clang::AArch64::BI__builtin_arm_wfi:
10037 case clang::AArch64::BI__wfi:
10038 HintID = 3;
10039 break;
10040 case clang::AArch64::BI__builtin_arm_sev:
10041 case clang::AArch64::BI__sev:
10042 HintID = 4;
10043 break;
10044 case clang::AArch64::BI__builtin_arm_sevl:
10045 case clang::AArch64::BI__sevl:
10046 HintID = 5;
10047 break;
10048 }
10049
10050 if (HintID != static_cast<unsigned>(-1)) {
10051 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10052 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10053 }
10054
10055 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10056 assert((getContext().getTypeSize(E->getType()) == 32) &&
10057 "rbit of unusual size!");
10058 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10059 return Builder.CreateCall(
10060 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10061 }
10062 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10063 assert((getContext().getTypeSize(E->getType()) == 64) &&
10064 "rbit of unusual size!");
10065 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10066 return Builder.CreateCall(
10067 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10068 }
10069
10070 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10071 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10072 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10073 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10074 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10075 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10076 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10077 return Res;
10078 }
10079
10080 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10081 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10082 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10083 "cls");
10084 }
10085 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10086 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10087 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10088 "cls");
10089 }
10090
10091 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10092 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
10093 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10094 llvm::Type *Ty = Arg->getType();
10095 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10096 Arg, "frint32z");
10097 }
10098
10099 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10100 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
10101 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10102 llvm::Type *Ty = Arg->getType();
10103 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10104 Arg, "frint64z");
10105 }
10106
10107 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10108 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
10109 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10110 llvm::Type *Ty = Arg->getType();
10111 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10112 Arg, "frint32x");
10113 }
10114
10115 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10116 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
10117 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10118 llvm::Type *Ty = Arg->getType();
10119 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10120 Arg, "frint64x");
10121 }
10122
10123 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10124 assert((getContext().getTypeSize(E->getType()) == 32) &&
10125 "__jcvt of unusual size!");
10126 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10127 return Builder.CreateCall(
10128 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10129 }
10130
10131 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10132 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
10133 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
10134 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
10135 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
10136 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
10137
10138 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10139 // Load from the address via an LLVM intrinsic, receiving a
10140 // tuple of 8 i64 words, and store each one to ValPtr.
10141 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10142 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
10143 llvm::Value *ToRet;
10144 for (size_t i = 0; i < 8; i++) {
10145 llvm::Value *ValOffsetPtr =
10146 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10147 Address Addr =
10148 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10149 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
10150 }
10151 return ToRet;
10152 } else {
10153 // Load 8 i64 words from ValPtr, and store them to the address
10154 // via an LLVM intrinsic.
10156 Args.push_back(MemAddr);
10157 for (size_t i = 0; i < 8; i++) {
10158 llvm::Value *ValOffsetPtr =
10159 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10160 Address Addr =
10161 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10162 Args.push_back(Builder.CreateLoad(Addr));
10163 }
10164
10165 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
10166 ? Intrinsic::aarch64_st64b
10167 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
10168 ? Intrinsic::aarch64_st64bv
10169 : Intrinsic::aarch64_st64bv0);
10170 Function *F = CGM.getIntrinsic(Intr);
10171 return Builder.CreateCall(F, Args);
10172 }
10173 }
10174
10175 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
10176 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
10177
10178 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
10179 ? Intrinsic::aarch64_rndr
10180 : Intrinsic::aarch64_rndrrs);
10181 Function *F = CGM.getIntrinsic(Intr);
10182 llvm::Value *Val = Builder.CreateCall(F);
10183 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
10184 Value *Status = Builder.CreateExtractValue(Val, 1);
10185
10186 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
10187 Builder.CreateStore(RandomValue, MemAddress);
10188 Status = Builder.CreateZExt(Status, Int32Ty);
10189 return Status;
10190 }
10191
10192 if (BuiltinID == clang::AArch64::BI__clear_cache) {
10193 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
10194 const FunctionDecl *FD = E->getDirectCallee();
10195 Value *Ops[2];
10196 for (unsigned i = 0; i < 2; i++)
10197 Ops[i] = EmitScalarExpr(E->getArg(i));
10198 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
10199 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
10200 StringRef Name = FD->getName();
10201 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
10202 }
10203
10204 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10205 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
10206 getContext().getTypeSize(E->getType()) == 128) {
10207 Function *F =
10208 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10209 ? Intrinsic::aarch64_ldaxp
10210 : Intrinsic::aarch64_ldxp);
10211
10212 Value *LdPtr = EmitScalarExpr(E->getArg(0));
10213 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
10214 "ldxp");
10215
10216 Value *Val0 = Builder.CreateExtractValue(Val, 1);
10217 Value *Val1 = Builder.CreateExtractValue(Val, 0);
10218 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10219 Val0 = Builder.CreateZExt(Val0, Int128Ty);
10220 Val1 = Builder.CreateZExt(Val1, Int128Ty);
10221
10222 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
10223 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
10224 Val = Builder.CreateOr(Val, Val1);
10225 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
10226 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10227 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
10228 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
10229
10230 QualType Ty = E->getType();
10231 llvm::Type *RealResTy = ConvertType(Ty);
10232 llvm::Type *IntTy =
10233 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10234 llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext());
10235
10236 Function *F =
10237 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10238 ? Intrinsic::aarch64_ldaxr
10239 : Intrinsic::aarch64_ldxr,
10240 PtrTy);
10241 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
10242 Val->addParamAttr(
10243 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
10244
10245 if (RealResTy->isPointerTy())
10246 return Builder.CreateIntToPtr(Val, RealResTy);
10247
10248 llvm::Type *IntResTy = llvm::IntegerType::get(
10249 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
10250 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
10251 RealResTy);
10252 }
10253
10254 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10255 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
10256 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
10257 Function *F =
10258 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10259 ? Intrinsic::aarch64_stlxp
10260 : Intrinsic::aarch64_stxp);
10261 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
10262
10263 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
10264 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
10265
10266 Tmp = Tmp.withElementType(STy);
10267 llvm::Value *Val = Builder.CreateLoad(Tmp);
10268
10269 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
10270 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
10271 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
10272 Int8PtrTy);
10273 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
10274 }
10275
10276 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10277 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
10278 Value *StoreVal = EmitScalarExpr(E->getArg(0));
10279 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
10280
10281 QualType Ty = E->getArg(0)->getType();
10282 llvm::Type *StoreTy =
10283 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10284
10285 if (StoreVal->getType()->isPointerTy())
10286 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
10287 else {
10288 llvm::Type *IntTy = llvm::IntegerType::get(
10290 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
10291 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
10292 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
10293 }
10294
10295 Function *F =
10296 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10297 ? Intrinsic::aarch64_stlxr
10298 : Intrinsic::aarch64_stxr,
10299 StoreAddr->getType());
10300 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
10301 CI->addParamAttr(
10302 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
10303 return CI;
10304 }
10305
10306 if (BuiltinID == clang::AArch64::BI__getReg) {
10308 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
10309 llvm_unreachable("Sema will ensure that the parameter is constant");
10310
10311 llvm::APSInt Value = Result.Val.getInt();
10312 LLVMContext &Context = CGM.getLLVMContext();
10313 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
10314
10315 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
10316 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10317 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10318
10319 llvm::Function *F =
10320 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
10321 return Builder.CreateCall(F, Metadata);
10322 }
10323
10324 if (BuiltinID == clang::AArch64::BI__break) {
10326 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
10327 llvm_unreachable("Sema will ensure that the parameter is constant");
10328
10329 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
10330 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
10331 }
10332
10333 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
10334 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
10335 return Builder.CreateCall(F);
10336 }
10337
10338 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
10339 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
10340 llvm::SyncScope::SingleThread);
10341
10342 // CRC32
10343 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
10344 switch (BuiltinID) {
10345 case clang::AArch64::BI__builtin_arm_crc32b:
10346 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
10347 case clang::AArch64::BI__builtin_arm_crc32cb:
10348 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
10349 case clang::AArch64::BI__builtin_arm_crc32h:
10350 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
10351 case clang::AArch64::BI__builtin_arm_crc32ch:
10352 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
10353 case clang::AArch64::BI__builtin_arm_crc32w:
10354 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
10355 case clang::AArch64::BI__builtin_arm_crc32cw:
10356 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
10357 case clang::AArch64::BI__builtin_arm_crc32d:
10358 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
10359 case clang::AArch64::BI__builtin_arm_crc32cd:
10360 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
10361 }
10362
10363 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
10364 Value *Arg0 = EmitScalarExpr(E->getArg(0));
10365 Value *Arg1 = EmitScalarExpr(E->getArg(1));
10366 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
10367
10368 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
10369 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
10370
10371 return Builder.CreateCall(F, {Arg0, Arg1});
10372 }
10373
10374 // Memory Operations (MOPS)
10375 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
10376 Value *Dst = EmitScalarExpr(E->getArg(0));
10377 Value *Val = EmitScalarExpr(E->getArg(1));
10378 Value *Size = EmitScalarExpr(E->getArg(2));
10379 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
10380 Val = Builder.CreateTrunc(Val, Int8Ty);
10381 Size = Builder.CreateIntCast(Size, Int64Ty, false);
10382 return Builder.CreateCall(
10383 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
10384 }
10385
10386 // Memory Tagging Extensions (MTE) Intrinsics
10387 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
10388 switch (BuiltinID) {
10389 case clang::AArch64::BI__builtin_arm_irg:
10390 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
10391 case clang::AArch64::BI__builtin_arm_addg:
10392 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
10393 case clang::AArch64::BI__builtin_arm_gmi:
10394 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
10395 case clang::AArch64::BI__builtin_arm_ldg:
10396 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
10397 case clang::AArch64::BI__builtin_arm_stg:
10398 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
10399 case clang::AArch64::BI__builtin_arm_subp:
10400 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
10401 }
10402
10403 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
10404 llvm::Type *T = ConvertType(E->getType());
10405
10406 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
10408 Value *Mask = EmitScalarExpr(E->getArg(1));
10409
10410 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
10411 Mask = Builder.CreateZExt(Mask, Int64Ty);
10412 Value *RV = Builder.CreateCall(
10413 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
10414 return Builder.CreatePointerCast(RV, T);
10415 }
10416 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
10418 Value *TagOffset = EmitScalarExpr(E->getArg(1));
10419
10420 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
10421 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
10422 Value *RV = Builder.CreateCall(
10423 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
10424 return Builder.CreatePointerCast(RV, T);
10425 }
10426 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
10428 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
10429
10430 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
10431 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
10432 return Builder.CreateCall(
10433 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
10434 }
10435 // Although it is possible to supply a different return
10436 // address (first arg) to this intrinsic, for now we set
10437 // return address same as input address.
10438 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
10439 Value *TagAddress = EmitScalarExpr(E->getArg(0));
10440 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
10441 Value *RV = Builder.CreateCall(
10442 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
10443 return Builder.CreatePointerCast(RV, T);
10444 }
10445 // Although it is possible to supply a different tag (to set)
10446 // to this intrinsic (as first arg), for now we supply
10447 // the tag that is in input address arg (common use case).
10448 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
10449 Value *TagAddress = EmitScalarExpr(E->getArg(0));
10450 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
10451 return Builder.CreateCall(
10452 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
10453 }
10454 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
10455 Value *PointerA = EmitScalarExpr(E->getArg(0));
10456 Value *PointerB = EmitScalarExpr(E->getArg(1));
10457 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
10458 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
10459 return Builder.CreateCall(
10460 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
10461 }
10462 }
10463
10464 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
10465 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
10466 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
10467 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
10468 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
10469 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
10470 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
10471 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
10472
10473 SpecialRegisterAccessKind AccessKind = Write;
10474 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
10475 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
10476 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
10477 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
10478 AccessKind = VolatileRead;
10479
10480 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
10481 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
10482
10483 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
10484 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
10485
10486 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
10487 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
10488
10489 llvm::Type *ValueType;
10490 llvm::Type *RegisterType = Int64Ty;
10491 if (Is32Bit) {
10492 ValueType = Int32Ty;
10493 } else if (Is128Bit) {
10494 llvm::Type *Int128Ty =
10495 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
10496 ValueType = Int128Ty;
10497 RegisterType = Int128Ty;
10498 } else if (IsPointerBuiltin) {
10499 ValueType = VoidPtrTy;
10500 } else {
10501 ValueType = Int64Ty;
10502 };
10503
10504 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
10505 AccessKind);
10506 }
10507
10508 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
10509 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
10510 LLVMContext &Context = CGM.getLLVMContext();
10511
10512 unsigned SysReg =
10513 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
10514
10515 std::string SysRegStr;
10516 llvm::raw_string_ostream(SysRegStr) <<
10517 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
10518 ((SysReg >> 11) & 7) << ":" <<
10519 ((SysReg >> 7) & 15) << ":" <<
10520 ((SysReg >> 3) & 15) << ":" <<
10521 ( SysReg & 7);
10522
10523 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
10524 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10525 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10526
10527 llvm::Type *RegisterType = Int64Ty;
10528 llvm::Type *Types[] = { RegisterType };
10529
10530 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
10531 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
10532
10533 return Builder.CreateCall(F, Metadata);
10534 }
10535
10536 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
10537 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
10538
10539 return Builder.CreateCall(F, { Metadata, ArgValue });
10540 }
10541
10542 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
10543 llvm::Function *F =
10544 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
10545 return Builder.CreateCall(F);
10546 }
10547
10548 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
10549 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
10550 return Builder.CreateCall(F);
10551 }
10552
10553 if (BuiltinID == clang::AArch64::BI__mulh ||
10554 BuiltinID == clang::AArch64::BI__umulh) {
10555 llvm::Type *ResType = ConvertType(E->getType());
10556 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10557
10558 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
10559 Value *LHS =
10560 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
10561 Value *RHS =
10562 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
10563
10564 Value *MulResult, *HigherBits;
10565 if (IsSigned) {
10566 MulResult = Builder.CreateNSWMul(LHS, RHS);
10567 HigherBits = Builder.CreateAShr(MulResult, 64);
10568 } else {
10569 MulResult = Builder.CreateNUWMul(LHS, RHS);
10570 HigherBits = Builder.CreateLShr(MulResult, 64);
10571 }
10572 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
10573
10574 return HigherBits;
10575 }
10576
10577 if (BuiltinID == AArch64::BI__writex18byte ||
10578 BuiltinID == AArch64::BI__writex18word ||
10579 BuiltinID == AArch64::BI__writex18dword ||
10580 BuiltinID == AArch64::BI__writex18qword) {
10581 llvm::Type *IntTy = ConvertType(E->getArg(1)->getType());
10582
10583 // Read x18 as i8*
10584 LLVMContext &Context = CGM.getLLVMContext();
10585 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
10586 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10587 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10588 llvm::Function *F =
10589 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
10590 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
10591 X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
10592
10593 // Store val at x18 + offset
10594 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
10595 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
10596 Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
10597 Value *Val = EmitScalarExpr(E->getArg(1));
10598 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
10599 return Store;
10600 }
10601
10602 if (BuiltinID == AArch64::BI__readx18byte ||
10603 BuiltinID == AArch64::BI__readx18word ||
10604 BuiltinID == AArch64::BI__readx18dword ||
10605 BuiltinID == AArch64::BI__readx18qword) {
10606 llvm::Type *IntTy = ConvertType(E->getType());
10607
10608 // Read x18 as i8*
10609 LLVMContext &Context = CGM.getLLVMContext();
10610 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
10611 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10612 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10613 llvm::Function *F =
10614 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
10615 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
10616 X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
10617
10618 // Load x18 + offset
10619 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
10620 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
10621 Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
10622 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
10623 return Load;
10624 }
10625
10626 // Handle MSVC intrinsics before argument evaluation to prevent double
10627 // evaluation.
10628 if (std::optional<MSVCIntrin> MsvcIntId =
10630 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
10631
10632 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
10633 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
10634 return P.first == BuiltinID;
10635 });
10636 if (It != end(NEONEquivalentIntrinsicMap))
10637 BuiltinID = It->second;
10638
10639 // Find out if any arguments are required to be integer constant
10640 // expressions.
10641 unsigned ICEArguments = 0;
10643 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10644 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10645
10647 Address PtrOp0 = Address::invalid();
10648 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
10649 if (i == 0) {
10650 switch (BuiltinID) {
10651 case NEON::BI__builtin_neon_vld1_v:
10652 case NEON::BI__builtin_neon_vld1q_v:
10653 case NEON::BI__builtin_neon_vld1_dup_v:
10654 case NEON::BI__builtin_neon_vld1q_dup_v:
10655 case NEON::BI__builtin_neon_vld1_lane_v:
10656 case NEON::BI__builtin_neon_vld1q_lane_v:
10657 case NEON::BI__builtin_neon_vst1_v:
10658 case NEON::BI__builtin_neon_vst1q_v:
10659 case NEON::BI__builtin_neon_vst1_lane_v:
10660 case NEON::BI__builtin_neon_vst1q_lane_v:
10661 case NEON::BI__builtin_neon_vldap1_lane_s64:
10662 case NEON::BI__builtin_neon_vldap1q_lane_s64:
10663 case NEON::BI__builtin_neon_vstl1_lane_s64:
10664 case NEON::BI__builtin_neon_vstl1q_lane_s64:
10665 // Get the alignment for the argument in addition to the value;
10666 // we'll use it later.
10667 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
10668 Ops.push_back(PtrOp0.getPointer());
10669 continue;
10670 }
10671 }
10672 if ((ICEArguments & (1 << i)) == 0) {
10673 Ops.push_back(EmitScalarExpr(E->getArg(i)));
10674 } else {
10675 // If this is required to be a constant, constant fold it so that we know
10676 // that the generated intrinsic gets a ConstantInt.
10677 Ops.push_back(llvm::ConstantInt::get(
10680 }
10681 }
10682
10683 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
10684 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
10685 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
10686
10687 if (Builtin) {
10688 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
10689 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
10690 assert(Result && "SISD intrinsic should have been handled");
10691 return Result;
10692 }
10693
10694 const Expr *Arg = E->getArg(E->getNumArgs()-1);
10696 if (std::optional<llvm::APSInt> Result =
10698 // Determine the type of this overloaded NEON intrinsic.
10699 Type = NeonTypeFlags(Result->getZExtValue());
10700
10701 bool usgn = Type.isUnsigned();
10702 bool quad = Type.isQuad();
10703
10704 // Handle non-overloaded intrinsics first.
10705 switch (BuiltinID) {
10706 default: break;
10707 case NEON::BI__builtin_neon_vabsh_f16:
10708 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10709 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
10710 case NEON::BI__builtin_neon_vaddq_p128: {
10711 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
10712 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10713 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
10714 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
10715 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
10716 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
10717 return Builder.CreateBitCast(Ops[0], Int128Ty);
10718 }
10719 case NEON::BI__builtin_neon_vldrq_p128: {
10720 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
10721 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
10722 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
10723 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
10725 }
10726 case NEON::BI__builtin_neon_vstrq_p128: {
10727 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
10728 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
10729 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
10730 }
10731 case NEON::BI__builtin_neon_vcvts_f32_u32:
10732 case NEON::BI__builtin_neon_vcvtd_f64_u64:
10733 usgn = true;
10734 [[fallthrough]];
10735 case NEON::BI__builtin_neon_vcvts_f32_s32:
10736 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
10737 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10738 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
10739 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
10740 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
10741 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
10742 if (usgn)
10743 return Builder.CreateUIToFP(Ops[0], FTy);
10744 return Builder.CreateSIToFP(Ops[0], FTy);
10745 }
10746 case NEON::BI__builtin_neon_vcvth_f16_u16:
10747 case NEON::BI__builtin_neon_vcvth_f16_u32:
10748 case NEON::BI__builtin_neon_vcvth_f16_u64:
10749 usgn = true;
10750 [[fallthrough]];
10751 case NEON::BI__builtin_neon_vcvth_f16_s16:
10752 case NEON::BI__builtin_neon_vcvth_f16_s32:
10753 case NEON::BI__builtin_neon_vcvth_f16_s64: {
10754 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10755 llvm::Type *FTy = HalfTy;
10756 llvm::Type *InTy;
10757 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
10758 InTy = Int64Ty;
10759 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
10760 InTy = Int32Ty;
10761 else
10762 InTy = Int16Ty;
10763 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
10764 if (usgn)
10765 return Builder.CreateUIToFP(Ops[0], FTy);
10766 return Builder.CreateSIToFP(Ops[0], FTy);
10767 }
10768 case NEON::BI__builtin_neon_vcvtah_u16_f16:
10769 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
10770 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
10771 case NEON::BI__builtin_neon_vcvtph_u16_f16:
10772 case NEON::BI__builtin_neon_vcvth_u16_f16:
10773 case NEON::BI__builtin_neon_vcvtah_s16_f16:
10774 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
10775 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
10776 case NEON::BI__builtin_neon_vcvtph_s16_f16:
10777 case NEON::BI__builtin_neon_vcvth_s16_f16: {
10778 unsigned Int;
10779 llvm::Type* InTy = Int32Ty;
10780 llvm::Type* FTy = HalfTy;
10781 llvm::Type *Tys[2] = {InTy, FTy};
10782 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10783 switch (BuiltinID) {
10784 default: llvm_unreachable("missing builtin ID in switch!");
10785 case NEON::BI__builtin_neon_vcvtah_u16_f16:
10786 Int = Intrinsic::aarch64_neon_fcvtau; break;
10787 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
10788 Int = Intrinsic::aarch64_neon_fcvtmu; break;
10789 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
10790 Int = Intrinsic::aarch64_neon_fcvtnu; break;
10791 case NEON::BI__builtin_neon_vcvtph_u16_f16:
10792 Int = Intrinsic::aarch64_neon_fcvtpu; break;
10793 case NEON::BI__builtin_neon_vcvth_u16_f16:
10794 Int = Intrinsic::aarch64_neon_fcvtzu; break;
10795 case NEON::BI__builtin_neon_vcvtah_s16_f16:
10796 Int = Intrinsic::aarch64_neon_fcvtas; break;
10797 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
10798 Int = Intrinsic::aarch64_neon_fcvtms; break;
10799 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
10800 Int = Intrinsic::aarch64_neon_fcvtns; break;
10801 case NEON::BI__builtin_neon_vcvtph_s16_f16:
10802 Int = Intrinsic::aarch64_neon_fcvtps; break;
10803 case NEON::BI__builtin_neon_vcvth_s16_f16:
10804 Int = Intrinsic::aarch64_neon_fcvtzs; break;
10805 }
10806 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
10807 return Builder.CreateTrunc(Ops[0], Int16Ty);
10808 }
10809 case NEON::BI__builtin_neon_vcaleh_f16:
10810 case NEON::BI__builtin_neon_vcalth_f16:
10811 case NEON::BI__builtin_neon_vcageh_f16:
10812 case NEON::BI__builtin_neon_vcagth_f16: {
10813 unsigned Int;
10814 llvm::Type* InTy = Int32Ty;
10815 llvm::Type* FTy = HalfTy;
10816 llvm::Type *Tys[2] = {InTy, FTy};
10817 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10818 switch (BuiltinID) {
10819 default: llvm_unreachable("missing builtin ID in switch!");
10820 case NEON::BI__builtin_neon_vcageh_f16:
10821 Int = Intrinsic::aarch64_neon_facge; break;
10822 case NEON::BI__builtin_neon_vcagth_f16:
10823 Int = Intrinsic::aarch64_neon_facgt; break;
10824 case NEON::BI__builtin_neon_vcaleh_f16:
10825 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
10826 case NEON::BI__builtin_neon_vcalth_f16:
10827 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
10828 }
10829 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
10830 return Builder.CreateTrunc(Ops[0], Int16Ty);
10831 }
10832 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
10833 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
10834 unsigned Int;
10835 llvm::Type* InTy = Int32Ty;
10836 llvm::Type* FTy = HalfTy;
10837 llvm::Type *Tys[2] = {InTy, FTy};
10838 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10839 switch (BuiltinID) {
10840 default: llvm_unreachable("missing builtin ID in switch!");
10841 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
10842 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
10843 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
10844 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
10845 }
10846 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
10847 return Builder.CreateTrunc(Ops[0], Int16Ty);
10848 }
10849 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
10850 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
10851 unsigned Int;
10852 llvm::Type* FTy = HalfTy;
10853 llvm::Type* InTy = Int32Ty;
10854 llvm::Type *Tys[2] = {FTy, InTy};
10855 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10856 switch (BuiltinID) {
10857 default: llvm_unreachable("missing builtin ID in switch!");
10858 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
10859 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
10860 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
10861 break;
10862 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
10863 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
10864 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
10865 break;
10866 }
10867 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
10868 }
10869 case NEON::BI__builtin_neon_vpaddd_s64: {
10870 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
10871 Value *Vec = EmitScalarExpr(E->getArg(0));
10872 // The vector is v2f64, so make sure it's bitcast to that.
10873 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
10874 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
10875 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
10876 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
10877 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
10878 // Pairwise addition of a v2f64 into a scalar f64.
10879 return Builder.CreateAdd(Op0, Op1, "vpaddd");
10880 }
10881 case NEON::BI__builtin_neon_vpaddd_f64: {
10882 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
10883 Value *Vec = EmitScalarExpr(E->getArg(0));
10884 // The vector is v2f64, so make sure it's bitcast to that.
10885 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
10886 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
10887 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
10888 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
10889 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
10890 // Pairwise addition of a v2f64 into a scalar f64.
10891 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
10892 }
10893 case NEON::BI__builtin_neon_vpadds_f32: {
10894 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
10895 Value *Vec = EmitScalarExpr(E->getArg(0));
10896 // The vector is v2f32, so make sure it's bitcast to that.
10897 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
10898 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
10899 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
10900 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
10901 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
10902 // Pairwise addition of a v2f32 into a scalar f32.
10903 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
10904 }
10905 case NEON::BI__builtin_neon_vceqzd_s64:
10906 case NEON::BI__builtin_neon_vceqzd_f64:
10907 case NEON::BI__builtin_neon_vceqzs_f32:
10908 case NEON::BI__builtin_neon_vceqzh_f16:
10909 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10912 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
10913 case NEON::BI__builtin_neon_vcgezd_s64:
10914 case NEON::BI__builtin_neon_vcgezd_f64:
10915 case NEON::BI__builtin_neon_vcgezs_f32:
10916 case NEON::BI__builtin_neon_vcgezh_f16:
10917 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10920 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
10921 case NEON::BI__builtin_neon_vclezd_s64:
10922 case NEON::BI__builtin_neon_vclezd_f64:
10923 case NEON::BI__builtin_neon_vclezs_f32:
10924 case NEON::BI__builtin_neon_vclezh_f16:
10925 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10928 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
10929 case NEON::BI__builtin_neon_vcgtzd_s64:
10930 case NEON::BI__builtin_neon_vcgtzd_f64:
10931 case NEON::BI__builtin_neon_vcgtzs_f32:
10932 case NEON::BI__builtin_neon_vcgtzh_f16:
10933 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10936 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
10937 case NEON::BI__builtin_neon_vcltzd_s64:
10938 case NEON::BI__builtin_neon_vcltzd_f64:
10939 case NEON::BI__builtin_neon_vcltzs_f32:
10940 case NEON::BI__builtin_neon_vcltzh_f16:
10941 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10944 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
10945
10946 case NEON::BI__builtin_neon_vceqzd_u64: {
10947 Ops.push_back(EmitScalarExpr(E->getArg(0)));
10948 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
10949 Ops[0] =
10950 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
10951 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
10952 }
10953 case NEON::BI__builtin_neon_vceqd_f64:
10954 case NEON::BI__builtin_neon_vcled_f64:
10955 case NEON::BI__builtin_neon_vcltd_f64:
10956 case NEON::BI__builtin_neon_vcged_f64:
10957 case NEON::BI__builtin_neon_vcgtd_f64: {
10958 llvm::CmpInst::Predicate P;
10959 switch (BuiltinID) {
10960 default: llvm_unreachable("missing builtin ID in switch!");
10961 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
10962 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
10963 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
10964 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
10965 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
10966 }
10967 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10968 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
10969 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
10970 if (P == llvm::FCmpInst::FCMP_OEQ)
10971 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
10972 else
10973 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
10974 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
10975 }
10976 case NEON::BI__builtin_neon_vceqs_f32:
10977 case NEON::BI__builtin_neon_vcles_f32:
10978 case NEON::BI__builtin_neon_vclts_f32:
10979 case NEON::BI__builtin_neon_vcges_f32:
10980 case NEON::BI__builtin_neon_vcgts_f32: {
10981 llvm::CmpInst::Predicate P;
10982 switch (BuiltinID) {
10983 default: llvm_unreachable("missing builtin ID in switch!");
10984 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
10985 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
10986 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
10987 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
10988 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
10989 }
10990 Ops.push_back(EmitScalarExpr(E->getArg(1)));
10991 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
10992 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
10993 if (P == llvm::FCmpInst::FCMP_OEQ)
10994 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
10995 else
10996 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
10997 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
10998 }
10999 case NEON::BI__builtin_neon_vceqh_f16:
11000 case NEON::BI__builtin_neon_vcleh_f16:
11001 case NEON::BI__builtin_neon_vclth_f16:
11002 case NEON::BI__builtin_neon_vcgeh_f16:
11003 case NEON::BI__builtin_neon_vcgth_f16: {
11004 llvm::CmpInst::Predicate P;
11005 switch (BuiltinID) {
11006 default: llvm_unreachable("missing builtin ID in switch!");
11007 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11008 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11009 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11010 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11011 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11012 }
11013 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11014 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11015 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11016 if (P == llvm::FCmpInst::FCMP_OEQ)
11017 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11018 else
11019 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11020 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11021 }
11022 case NEON::BI__builtin_neon_vceqd_s64:
11023 case NEON::BI__builtin_neon_vceqd_u64:
11024 case NEON::BI__builtin_neon_vcgtd_s64:
11025 case NEON::BI__builtin_neon_vcgtd_u64:
11026 case NEON::BI__builtin_neon_vcltd_s64:
11027 case NEON::BI__builtin_neon_vcltd_u64:
11028 case NEON::BI__builtin_neon_vcged_u64:
11029 case NEON::BI__builtin_neon_vcged_s64:
11030 case NEON::BI__builtin_neon_vcled_u64:
11031 case NEON::BI__builtin_neon_vcled_s64: {
11032 llvm::CmpInst::Predicate P;
11033 switch (BuiltinID) {
11034 default: llvm_unreachable("missing builtin ID in switch!");
11035 case NEON::BI__builtin_neon_vceqd_s64:
11036 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11037 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11038 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11039 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11040 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11041 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11042 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11043 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11044 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11045 }
11046 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11047 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11048 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11049 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
11050 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
11051 }
11052 case NEON::BI__builtin_neon_vtstd_s64:
11053 case NEON::BI__builtin_neon_vtstd_u64: {
11054 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11055 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11056 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11057 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
11058 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
11059 llvm::Constant::getNullValue(Int64Ty));
11060 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
11061 }
11062 case NEON::BI__builtin_neon_vset_lane_i8:
11063 case NEON::BI__builtin_neon_vset_lane_i16:
11064 case NEON::BI__builtin_neon_vset_lane_i32:
11065 case NEON::BI__builtin_neon_vset_lane_i64:
11066 case NEON::BI__builtin_neon_vset_lane_bf16:
11067 case NEON::BI__builtin_neon_vset_lane_f32:
11068 case NEON::BI__builtin_neon_vsetq_lane_i8:
11069 case NEON::BI__builtin_neon_vsetq_lane_i16:
11070 case NEON::BI__builtin_neon_vsetq_lane_i32:
11071 case NEON::BI__builtin_neon_vsetq_lane_i64:
11072 case NEON::BI__builtin_neon_vsetq_lane_bf16:
11073 case NEON::BI__builtin_neon_vsetq_lane_f32:
11074 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11075 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11076 case NEON::BI__builtin_neon_vset_lane_f64:
11077 // The vector type needs a cast for the v1f64 variant.
11078 Ops[1] =
11079 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
11080 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11081 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11082 case NEON::BI__builtin_neon_vsetq_lane_f64:
11083 // The vector type needs a cast for the v2f64 variant.
11084 Ops[1] =
11085 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
11086 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11087 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11088
11089 case NEON::BI__builtin_neon_vget_lane_i8:
11090 case NEON::BI__builtin_neon_vdupb_lane_i8:
11091 Ops[0] =
11092 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
11093 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11094 "vget_lane");
11095 case NEON::BI__builtin_neon_vgetq_lane_i8:
11096 case NEON::BI__builtin_neon_vdupb_laneq_i8:
11097 Ops[0] =
11098 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
11099 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11100 "vgetq_lane");
11101 case NEON::BI__builtin_neon_vget_lane_i16:
11102 case NEON::BI__builtin_neon_vduph_lane_i16:
11103 Ops[0] =
11104 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
11105 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11106 "vget_lane");
11107 case NEON::BI__builtin_neon_vgetq_lane_i16:
11108 case NEON::BI__builtin_neon_vduph_laneq_i16:
11109 Ops[0] =
11110 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
11111 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11112 "vgetq_lane");
11113 case NEON::BI__builtin_neon_vget_lane_i32:
11114 case NEON::BI__builtin_neon_vdups_lane_i32:
11115 Ops[0] =
11116 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
11117 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11118 "vget_lane");
11119 case NEON::BI__builtin_neon_vdups_lane_f32:
11120 Ops[0] =
11121 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11122 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11123 "vdups_lane");
11124 case NEON::BI__builtin_neon_vgetq_lane_i32:
11125 case NEON::BI__builtin_neon_vdups_laneq_i32:
11126 Ops[0] =
11127 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
11128 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11129 "vgetq_lane");
11130 case NEON::BI__builtin_neon_vget_lane_i64:
11131 case NEON::BI__builtin_neon_vdupd_lane_i64:
11132 Ops[0] =
11133 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
11134 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11135 "vget_lane");
11136 case NEON::BI__builtin_neon_vdupd_lane_f64:
11137 Ops[0] =
11138 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11139 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11140 "vdupd_lane");
11141 case NEON::BI__builtin_neon_vgetq_lane_i64:
11142 case NEON::BI__builtin_neon_vdupd_laneq_i64:
11143 Ops[0] =
11144 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
11145 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11146 "vgetq_lane");
11147 case NEON::BI__builtin_neon_vget_lane_f32:
11148 Ops[0] =
11149 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11150 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11151 "vget_lane");
11152 case NEON::BI__builtin_neon_vget_lane_f64:
11153 Ops[0] =
11154 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11155 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11156 "vget_lane");
11157 case NEON::BI__builtin_neon_vgetq_lane_f32:
11158 case NEON::BI__builtin_neon_vdups_laneq_f32:
11159 Ops[0] =
11160 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
11161 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11162 "vgetq_lane");
11163 case NEON::BI__builtin_neon_vgetq_lane_f64:
11164 case NEON::BI__builtin_neon_vdupd_laneq_f64:
11165 Ops[0] =
11166 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
11167 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11168 "vgetq_lane");
11169 case NEON::BI__builtin_neon_vaddh_f16:
11170 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11171 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
11172 case NEON::BI__builtin_neon_vsubh_f16:
11173 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11174 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
11175 case NEON::BI__builtin_neon_vmulh_f16:
11176 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11177 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
11178 case NEON::BI__builtin_neon_vdivh_f16:
11179 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11180 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
11181 case NEON::BI__builtin_neon_vfmah_f16:
11182 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11184 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11185 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
11186 case NEON::BI__builtin_neon_vfmsh_f16: {
11187 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
11188
11189 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11191 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11192 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
11193 }
11194 case NEON::BI__builtin_neon_vaddd_s64:
11195 case NEON::BI__builtin_neon_vaddd_u64:
11196 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
11197 case NEON::BI__builtin_neon_vsubd_s64:
11198 case NEON::BI__builtin_neon_vsubd_u64:
11199 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
11200 case NEON::BI__builtin_neon_vqdmlalh_s16:
11201 case NEON::BI__builtin_neon_vqdmlslh_s16: {
11202 SmallVector<Value *, 2> ProductOps;
11203 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11204 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
11205 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11206 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11207 ProductOps, "vqdmlXl");
11208 Constant *CI = ConstantInt::get(SizeTy, 0);
11209 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11210
11211 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
11212 ? Intrinsic::aarch64_neon_sqadd
11213 : Intrinsic::aarch64_neon_sqsub;
11214 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
11215 }
11216 case NEON::BI__builtin_neon_vqshlud_n_s64: {
11217 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11218 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11219 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
11220 Ops, "vqshlu_n");
11221 }
11222 case NEON::BI__builtin_neon_vqshld_n_u64:
11223 case NEON::BI__builtin_neon_vqshld_n_s64: {
11224 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
11225 ? Intrinsic::aarch64_neon_uqshl
11226 : Intrinsic::aarch64_neon_sqshl;
11227 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11228 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11229 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
11230 }
11231 case NEON::BI__builtin_neon_vrshrd_n_u64:
11232 case NEON::BI__builtin_neon_vrshrd_n_s64: {
11233 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
11234 ? Intrinsic::aarch64_neon_urshl
11235 : Intrinsic::aarch64_neon_srshl;
11236 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11237 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
11238 Ops[1] = ConstantInt::get(Int64Ty, -SV);
11239 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
11240 }
11241 case NEON::BI__builtin_neon_vrsrad_n_u64:
11242 case NEON::BI__builtin_neon_vrsrad_n_s64: {
11243 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
11244 ? Intrinsic::aarch64_neon_urshl
11245 : Intrinsic::aarch64_neon_srshl;
11246 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11247 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
11248 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
11249 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
11250 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
11251 }
11252 case NEON::BI__builtin_neon_vshld_n_s64:
11253 case NEON::BI__builtin_neon_vshld_n_u64: {
11254 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11255 return Builder.CreateShl(
11256 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
11257 }
11258 case NEON::BI__builtin_neon_vshrd_n_s64: {
11259 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11260 return Builder.CreateAShr(
11261 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
11262 Amt->getZExtValue())),
11263 "shrd_n");
11264 }
11265 case NEON::BI__builtin_neon_vshrd_n_u64: {
11266 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11267 uint64_t ShiftAmt = Amt->getZExtValue();
11268 // Right-shifting an unsigned value by its size yields 0.
11269 if (ShiftAmt == 64)
11270 return ConstantInt::get(Int64Ty, 0);
11271 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
11272 "shrd_n");
11273 }
11274 case NEON::BI__builtin_neon_vsrad_n_s64: {
11275 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
11276 Ops[1] = Builder.CreateAShr(
11277 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
11278 Amt->getZExtValue())),
11279 "shrd_n");
11280 return Builder.CreateAdd(Ops[0], Ops[1]);
11281 }
11282 case NEON::BI__builtin_neon_vsrad_n_u64: {
11283 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
11284 uint64_t ShiftAmt = Amt->getZExtValue();
11285 // Right-shifting an unsigned value by its size yields 0.
11286 // As Op + 0 = Op, return Ops[0] directly.
11287 if (ShiftAmt == 64)
11288 return Ops[0];
11289 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
11290 "shrd_n");
11291 return Builder.CreateAdd(Ops[0], Ops[1]);
11292 }
11293 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
11294 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
11295 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
11296 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
11297 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
11298 "lane");
11299 SmallVector<Value *, 2> ProductOps;
11300 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11301 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
11302 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11303 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11304 ProductOps, "vqdmlXl");
11305 Constant *CI = ConstantInt::get(SizeTy, 0);
11306 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11307 Ops.pop_back();
11308
11309 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
11310 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
11311 ? Intrinsic::aarch64_neon_sqadd
11312 : Intrinsic::aarch64_neon_sqsub;
11313 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
11314 }
11315 case NEON::BI__builtin_neon_vqdmlals_s32:
11316 case NEON::BI__builtin_neon_vqdmlsls_s32: {
11317 SmallVector<Value *, 2> ProductOps;
11318 ProductOps.push_back(Ops[1]);
11319 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
11320 Ops[1] =
11321 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
11322 ProductOps, "vqdmlXl");
11323
11324 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
11325 ? Intrinsic::aarch64_neon_sqadd
11326 : Intrinsic::aarch64_neon_sqsub;
11327 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
11328 }
11329 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
11330 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
11331 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
11332 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
11333 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
11334 "lane");
11335 SmallVector<Value *, 2> ProductOps;
11336 ProductOps.push_back(Ops[1]);
11337 ProductOps.push_back(Ops[2]);
11338 Ops[1] =
11339 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
11340 ProductOps, "vqdmlXl");
11341 Ops.pop_back();
11342
11343 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
11344 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
11345 ? Intrinsic::aarch64_neon_sqadd
11346 : Intrinsic::aarch64_neon_sqsub;
11347 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
11348 }
11349 case NEON::BI__builtin_neon_vget_lane_bf16:
11350 case NEON::BI__builtin_neon_vduph_lane_bf16:
11351 case NEON::BI__builtin_neon_vduph_lane_f16: {
11352 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11353 "vget_lane");
11354 }
11355 case NEON::BI__builtin_neon_vgetq_lane_bf16:
11356 case NEON::BI__builtin_neon_vduph_laneq_bf16:
11357 case NEON::BI__builtin_neon_vduph_laneq_f16: {
11358 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11359 "vgetq_lane");
11360 }
11361
11362 case clang::AArch64::BI_InterlockedAdd: {
11363 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11364 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11365 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
11366 AtomicRMWInst::Add, Arg0, Arg1,
11367 llvm::AtomicOrdering::SequentiallyConsistent);
11368 return Builder.CreateAdd(RMWI, Arg1);
11369 }
11370 }
11371
11372 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
11373 llvm::Type *Ty = VTy;
11374 if (!Ty)
11375 return nullptr;
11376
11377 // Not all intrinsics handled by the common case work for AArch64 yet, so only
11378 // defer to common code if it's been added to our special map.
11381
11382 if (Builtin)
11384 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
11385 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
11386 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
11387
11388 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
11389 return V;
11390
11391 unsigned Int;
11392 switch (BuiltinID) {
11393 default: return nullptr;
11394 case NEON::BI__builtin_neon_vbsl_v:
11395 case NEON::BI__builtin_neon_vbslq_v: {
11396 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
11397 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
11398 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
11399 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
11400
11401 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
11402 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
11403 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
11404 return Builder.CreateBitCast(Ops[0], Ty);
11405 }
11406 case NEON::BI__builtin_neon_vfma_lane_v:
11407 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
11408 // The ARM builtins (and instructions) have the addend as the first
11409 // operand, but the 'fma' intrinsics have it last. Swap it around here.
11410 Value *Addend = Ops[0];
11411 Value *Multiplicand = Ops[1];
11412 Value *LaneSource = Ops[2];
11413 Ops[0] = Multiplicand;
11414 Ops[1] = LaneSource;
11415 Ops[2] = Addend;
11416
11417 // Now adjust things to handle the lane access.
11418 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
11419 ? llvm::FixedVectorType::get(VTy->getElementType(),
11420 VTy->getNumElements() / 2)
11421 : VTy;
11422 llvm::Constant *cst = cast<Constant>(Ops[3]);
11423 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
11424 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
11425 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
11426
11427 Ops.pop_back();
11428 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
11429 : Intrinsic::fma;
11430 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
11431 }
11432 case NEON::BI__builtin_neon_vfma_laneq_v: {
11433 auto *VTy = cast<llvm::FixedVectorType>(Ty);
11434 // v1f64 fma should be mapped to Neon scalar f64 fma
11435 if (VTy && VTy->getElementType() == DoubleTy) {
11436 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11437 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11438 llvm::FixedVectorType *VTy =
11440 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
11441 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
11442 Value *Result;
11444 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
11445 DoubleTy, {Ops[1], Ops[2], Ops[0]});
11446 return Builder.CreateBitCast(Result, Ty);
11447 }
11448 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11449 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11450
11451 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
11452 VTy->getNumElements() * 2);
11453 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
11454 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
11455 cast<ConstantInt>(Ops[3]));
11456 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
11457
11459 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
11460 {Ops[2], Ops[1], Ops[0]});
11461 }
11462 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
11463 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11464 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11465
11466 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
11467 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
11469 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
11470 {Ops[2], Ops[1], Ops[0]});
11471 }
11472 case NEON::BI__builtin_neon_vfmah_lane_f16:
11473 case NEON::BI__builtin_neon_vfmas_lane_f32:
11474 case NEON::BI__builtin_neon_vfmah_laneq_f16:
11475 case NEON::BI__builtin_neon_vfmas_laneq_f32:
11476 case NEON::BI__builtin_neon_vfmad_lane_f64:
11477 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
11478 Ops.push_back(EmitScalarExpr(E->getArg(3)));
11479 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
11480 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
11482 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
11483 {Ops[1], Ops[2], Ops[0]});
11484 }
11485 case NEON::BI__builtin_neon_vmull_v:
11486 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11487 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
11488 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
11489 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
11490 case NEON::BI__builtin_neon_vmax_v:
11491 case NEON::BI__builtin_neon_vmaxq_v:
11492 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11493 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
11494 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
11495 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
11496 case NEON::BI__builtin_neon_vmaxh_f16: {
11497 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11498 Int = Intrinsic::aarch64_neon_fmax;
11499 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
11500 }
11501 case NEON::BI__builtin_neon_vmin_v:
11502 case NEON::BI__builtin_neon_vminq_v:
11503 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11504 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
11505 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
11506 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
11507 case NEON::BI__builtin_neon_vminh_f16: {
11508 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11509 Int = Intrinsic::aarch64_neon_fmin;
11510 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
11511 }
11512 case NEON::BI__builtin_neon_vabd_v:
11513 case NEON::BI__builtin_neon_vabdq_v:
11514 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11515 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
11516 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
11517 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
11518 case NEON::BI__builtin_neon_vpadal_v:
11519 case NEON::BI__builtin_neon_vpadalq_v: {
11520 unsigned ArgElts = VTy->getNumElements();
11521 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
11522 unsigned BitWidth = EltTy->getBitWidth();
11523 auto *ArgTy = llvm::FixedVectorType::get(
11524 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
11525 llvm::Type* Tys[2] = { VTy, ArgTy };
11526 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
11528 TmpOps.push_back(Ops[1]);
11529 Function *F = CGM.getIntrinsic(Int, Tys);
11530 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
11531 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
11532 return Builder.CreateAdd(tmp, addend);
11533 }
11534 case NEON::BI__builtin_neon_vpmin_v:
11535 case NEON::BI__builtin_neon_vpminq_v:
11536 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11537 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
11538 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
11539 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
11540 case NEON::BI__builtin_neon_vpmax_v:
11541 case NEON::BI__builtin_neon_vpmaxq_v:
11542 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
11543 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
11544 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
11545 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
11546 case NEON::BI__builtin_neon_vminnm_v:
11547 case NEON::BI__builtin_neon_vminnmq_v:
11548 Int = Intrinsic::aarch64_neon_fminnm;
11549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
11550 case NEON::BI__builtin_neon_vminnmh_f16:
11551 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11552 Int = Intrinsic::aarch64_neon_fminnm;
11553 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
11554 case NEON::BI__builtin_neon_vmaxnm_v:
11555 case NEON::BI__builtin_neon_vmaxnmq_v:
11556 Int = Intrinsic::aarch64_neon_fmaxnm;
11557 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
11558 case NEON::BI__builtin_neon_vmaxnmh_f16:
11559 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11560 Int = Intrinsic::aarch64_neon_fmaxnm;
11561 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
11562 case NEON::BI__builtin_neon_vrecpss_f32: {
11563 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11564 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
11565 Ops, "vrecps");
11566 }
11567 case NEON::BI__builtin_neon_vrecpsd_f64:
11568 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11569 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
11570 Ops, "vrecps");
11571 case NEON::BI__builtin_neon_vrecpsh_f16:
11572 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11573 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
11574 Ops, "vrecps");
11575 case NEON::BI__builtin_neon_vqshrun_n_v:
11576 Int = Intrinsic::aarch64_neon_sqshrun;
11577 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
11578 case NEON::BI__builtin_neon_vqrshrun_n_v:
11579 Int = Intrinsic::aarch64_neon_sqrshrun;
11580 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
11581 case NEON::BI__builtin_neon_vqshrn_n_v:
11582 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
11583 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
11584 case NEON::BI__builtin_neon_vrshrn_n_v:
11585 Int = Intrinsic::aarch64_neon_rshrn;
11586 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
11587 case NEON::BI__builtin_neon_vqrshrn_n_v:
11588 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
11589 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
11590 case NEON::BI__builtin_neon_vrndah_f16: {
11591 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11592 Int = Builder.getIsFPConstrained()
11593 ? Intrinsic::experimental_constrained_round
11594 : Intrinsic::round;
11595 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
11596 }
11597 case NEON::BI__builtin_neon_vrnda_v:
11598 case NEON::BI__builtin_neon_vrndaq_v: {
11599 Int = Builder.getIsFPConstrained()
11600 ? Intrinsic::experimental_constrained_round
11601 : Intrinsic::round;
11602 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
11603 }
11604 case NEON::BI__builtin_neon_vrndih_f16: {
11605 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11606 Int = Builder.getIsFPConstrained()
11607 ? Intrinsic::experimental_constrained_nearbyint
11608 : Intrinsic::nearbyint;
11609 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
11610 }
11611 case NEON::BI__builtin_neon_vrndmh_f16: {
11612 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11613 Int = Builder.getIsFPConstrained()
11614 ? Intrinsic::experimental_constrained_floor
11615 : Intrinsic::floor;
11616 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
11617 }
11618 case NEON::BI__builtin_neon_vrndm_v:
11619 case NEON::BI__builtin_neon_vrndmq_v: {
11620 Int = Builder.getIsFPConstrained()
11621 ? Intrinsic::experimental_constrained_floor
11622 : Intrinsic::floor;
11623 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
11624 }
11625 case NEON::BI__builtin_neon_vrndnh_f16: {
11626 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11627 Int = Builder.getIsFPConstrained()
11628 ? Intrinsic::experimental_constrained_roundeven
11629 : Intrinsic::roundeven;
11630 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
11631 }
11632 case NEON::BI__builtin_neon_vrndn_v:
11633 case NEON::BI__builtin_neon_vrndnq_v: {
11634 Int = Builder.getIsFPConstrained()
11635 ? Intrinsic::experimental_constrained_roundeven
11636 : Intrinsic::roundeven;
11637 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
11638 }
11639 case NEON::BI__builtin_neon_vrndns_f32: {
11640 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11641 Int = Builder.getIsFPConstrained()
11642 ? Intrinsic::experimental_constrained_roundeven
11643 : Intrinsic::roundeven;
11644 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
11645 }
11646 case NEON::BI__builtin_neon_vrndph_f16: {
11647 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11648 Int = Builder.getIsFPConstrained()
11649 ? Intrinsic::experimental_constrained_ceil
11650 : Intrinsic::ceil;
11651 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
11652 }
11653 case NEON::BI__builtin_neon_vrndp_v:
11654 case NEON::BI__builtin_neon_vrndpq_v: {
11655 Int = Builder.getIsFPConstrained()
11656 ? Intrinsic::experimental_constrained_ceil
11657 : Intrinsic::ceil;
11658 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
11659 }
11660 case NEON::BI__builtin_neon_vrndxh_f16: {
11661 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11662 Int = Builder.getIsFPConstrained()
11663 ? Intrinsic::experimental_constrained_rint
11664 : Intrinsic::rint;
11665 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
11666 }
11667 case NEON::BI__builtin_neon_vrndx_v:
11668 case NEON::BI__builtin_neon_vrndxq_v: {
11669 Int = Builder.getIsFPConstrained()
11670 ? Intrinsic::experimental_constrained_rint
11671 : Intrinsic::rint;
11672 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
11673 }
11674 case NEON::BI__builtin_neon_vrndh_f16: {
11675 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11676 Int = Builder.getIsFPConstrained()
11677 ? Intrinsic::experimental_constrained_trunc
11678 : Intrinsic::trunc;
11679 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
11680 }
11681 case NEON::BI__builtin_neon_vrnd32x_f32:
11682 case NEON::BI__builtin_neon_vrnd32xq_f32: {
11683 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11684 Int = Intrinsic::aarch64_neon_frint32x;
11685 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
11686 }
11687 case NEON::BI__builtin_neon_vrnd32z_f32:
11688 case NEON::BI__builtin_neon_vrnd32zq_f32: {
11689 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11690 Int = Intrinsic::aarch64_neon_frint32z;
11691 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
11692 }
11693 case NEON::BI__builtin_neon_vrnd64x_f32:
11694 case NEON::BI__builtin_neon_vrnd64xq_f32: {
11695 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11696 Int = Intrinsic::aarch64_neon_frint64x;
11697 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
11698 }
11699 case NEON::BI__builtin_neon_vrnd64z_f32:
11700 case NEON::BI__builtin_neon_vrnd64zq_f32: {
11701 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11702 Int = Intrinsic::aarch64_neon_frint64z;
11703 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
11704 }
11705 case NEON::BI__builtin_neon_vrnd_v:
11706 case NEON::BI__builtin_neon_vrndq_v: {
11707 Int = Builder.getIsFPConstrained()
11708 ? Intrinsic::experimental_constrained_trunc
11709 : Intrinsic::trunc;
11710 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
11711 }
11712 case NEON::BI__builtin_neon_vcvt_f64_v:
11713 case NEON::BI__builtin_neon_vcvtq_f64_v:
11714 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11715 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
11716 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
11717 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
11718 case NEON::BI__builtin_neon_vcvt_f64_f32: {
11719 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
11720 "unexpected vcvt_f64_f32 builtin");
11721 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
11722 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
11723
11724 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
11725 }
11726 case NEON::BI__builtin_neon_vcvt_f32_f64: {
11727 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
11728 "unexpected vcvt_f32_f64 builtin");
11729 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
11730 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
11731
11732 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
11733 }
11734 case NEON::BI__builtin_neon_vcvt_s32_v:
11735 case NEON::BI__builtin_neon_vcvt_u32_v:
11736 case NEON::BI__builtin_neon_vcvt_s64_v:
11737 case NEON::BI__builtin_neon_vcvt_u64_v:
11738 case NEON::BI__builtin_neon_vcvt_s16_f16:
11739 case NEON::BI__builtin_neon_vcvt_u16_f16:
11740 case NEON::BI__builtin_neon_vcvtq_s32_v:
11741 case NEON::BI__builtin_neon_vcvtq_u32_v:
11742 case NEON::BI__builtin_neon_vcvtq_s64_v:
11743 case NEON::BI__builtin_neon_vcvtq_u64_v:
11744 case NEON::BI__builtin_neon_vcvtq_s16_f16:
11745 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
11746 Int =
11747 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
11748 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
11749 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
11750 }
11751 case NEON::BI__builtin_neon_vcvta_s16_f16:
11752 case NEON::BI__builtin_neon_vcvta_u16_f16:
11753 case NEON::BI__builtin_neon_vcvta_s32_v:
11754 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
11755 case NEON::BI__builtin_neon_vcvtaq_s32_v:
11756 case NEON::BI__builtin_neon_vcvta_u32_v:
11757 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
11758 case NEON::BI__builtin_neon_vcvtaq_u32_v:
11759 case NEON::BI__builtin_neon_vcvta_s64_v:
11760 case NEON::BI__builtin_neon_vcvtaq_s64_v:
11761 case NEON::BI__builtin_neon_vcvta_u64_v:
11762 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
11763 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
11764 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11765 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
11766 }
11767 case NEON::BI__builtin_neon_vcvtm_s16_f16:
11768 case NEON::BI__builtin_neon_vcvtm_s32_v:
11769 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
11770 case NEON::BI__builtin_neon_vcvtmq_s32_v:
11771 case NEON::BI__builtin_neon_vcvtm_u16_f16:
11772 case NEON::BI__builtin_neon_vcvtm_u32_v:
11773 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
11774 case NEON::BI__builtin_neon_vcvtmq_u32_v:
11775 case NEON::BI__builtin_neon_vcvtm_s64_v:
11776 case NEON::BI__builtin_neon_vcvtmq_s64_v:
11777 case NEON::BI__builtin_neon_vcvtm_u64_v:
11778 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
11779 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
11780 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11781 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
11782 }
11783 case NEON::BI__builtin_neon_vcvtn_s16_f16:
11784 case NEON::BI__builtin_neon_vcvtn_s32_v:
11785 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
11786 case NEON::BI__builtin_neon_vcvtnq_s32_v:
11787 case NEON::BI__builtin_neon_vcvtn_u16_f16:
11788 case NEON::BI__builtin_neon_vcvtn_u32_v:
11789 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
11790 case NEON::BI__builtin_neon_vcvtnq_u32_v:
11791 case NEON::BI__builtin_neon_vcvtn_s64_v:
11792 case NEON::BI__builtin_neon_vcvtnq_s64_v:
11793 case NEON::BI__builtin_neon_vcvtn_u64_v:
11794 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
11795 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
11796 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11797 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
11798 }
11799 case NEON::BI__builtin_neon_vcvtp_s16_f16:
11800 case NEON::BI__builtin_neon_vcvtp_s32_v:
11801 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
11802 case NEON::BI__builtin_neon_vcvtpq_s32_v:
11803 case NEON::BI__builtin_neon_vcvtp_u16_f16:
11804 case NEON::BI__builtin_neon_vcvtp_u32_v:
11805 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
11806 case NEON::BI__builtin_neon_vcvtpq_u32_v:
11807 case NEON::BI__builtin_neon_vcvtp_s64_v:
11808 case NEON::BI__builtin_neon_vcvtpq_s64_v:
11809 case NEON::BI__builtin_neon_vcvtp_u64_v:
11810 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
11811 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
11812 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
11813 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
11814 }
11815 case NEON::BI__builtin_neon_vmulx_v:
11816 case NEON::BI__builtin_neon_vmulxq_v: {
11817 Int = Intrinsic::aarch64_neon_fmulx;
11818 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
11819 }
11820 case NEON::BI__builtin_neon_vmulxh_lane_f16:
11821 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
11822 // vmulx_lane should be mapped to Neon scalar mulx after
11823 // extracting the scalar element
11824 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11825 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
11826 Ops.pop_back();
11827 Int = Intrinsic::aarch64_neon_fmulx;
11828 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
11829 }
11830 case NEON::BI__builtin_neon_vmul_lane_v:
11831 case NEON::BI__builtin_neon_vmul_laneq_v: {
11832 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
11833 bool Quad = false;
11834 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
11835 Quad = true;
11836 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11837 llvm::FixedVectorType *VTy =
11839 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
11840 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
11841 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
11842 return Builder.CreateBitCast(Result, Ty);
11843 }
11844 case NEON::BI__builtin_neon_vnegd_s64:
11845 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
11846 case NEON::BI__builtin_neon_vnegh_f16:
11847 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
11848 case NEON::BI__builtin_neon_vpmaxnm_v:
11849 case NEON::BI__builtin_neon_vpmaxnmq_v: {
11850 Int = Intrinsic::aarch64_neon_fmaxnmp;
11851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
11852 }
11853 case NEON::BI__builtin_neon_vpminnm_v:
11854 case NEON::BI__builtin_neon_vpminnmq_v: {
11855 Int = Intrinsic::aarch64_neon_fminnmp;
11856 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
11857 }
11858 case NEON::BI__builtin_neon_vsqrth_f16: {
11859 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11860 Int = Builder.getIsFPConstrained()
11861 ? Intrinsic::experimental_constrained_sqrt
11862 : Intrinsic::sqrt;
11863 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
11864 }
11865 case NEON::BI__builtin_neon_vsqrt_v:
11866 case NEON::BI__builtin_neon_vsqrtq_v: {
11867 Int = Builder.getIsFPConstrained()
11868 ? Intrinsic::experimental_constrained_sqrt
11869 : Intrinsic::sqrt;
11870 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11871 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
11872 }
11873 case NEON::BI__builtin_neon_vrbit_v:
11874 case NEON::BI__builtin_neon_vrbitq_v: {
11875 Int = Intrinsic::bitreverse;
11876 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
11877 }
11878 case NEON::BI__builtin_neon_vaddv_u8:
11879 // FIXME: These are handled by the AArch64 scalar code.
11880 usgn = true;
11881 [[fallthrough]];
11882 case NEON::BI__builtin_neon_vaddv_s8: {
11883 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11884 Ty = Int32Ty;
11885 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11886 llvm::Type *Tys[2] = { Ty, VTy };
11887 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11888 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11889 return Builder.CreateTrunc(Ops[0], Int8Ty);
11890 }
11891 case NEON::BI__builtin_neon_vaddv_u16:
11892 usgn = true;
11893 [[fallthrough]];
11894 case NEON::BI__builtin_neon_vaddv_s16: {
11895 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11896 Ty = Int32Ty;
11897 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11898 llvm::Type *Tys[2] = { Ty, VTy };
11899 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11900 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11901 return Builder.CreateTrunc(Ops[0], Int16Ty);
11902 }
11903 case NEON::BI__builtin_neon_vaddvq_u8:
11904 usgn = true;
11905 [[fallthrough]];
11906 case NEON::BI__builtin_neon_vaddvq_s8: {
11907 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11908 Ty = Int32Ty;
11909 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11910 llvm::Type *Tys[2] = { Ty, VTy };
11911 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11912 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11913 return Builder.CreateTrunc(Ops[0], Int8Ty);
11914 }
11915 case NEON::BI__builtin_neon_vaddvq_u16:
11916 usgn = true;
11917 [[fallthrough]];
11918 case NEON::BI__builtin_neon_vaddvq_s16: {
11919 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
11920 Ty = Int32Ty;
11921 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11922 llvm::Type *Tys[2] = { Ty, VTy };
11923 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11924 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
11925 return Builder.CreateTrunc(Ops[0], Int16Ty);
11926 }
11927 case NEON::BI__builtin_neon_vmaxv_u8: {
11928 Int = Intrinsic::aarch64_neon_umaxv;
11929 Ty = Int32Ty;
11930 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11931 llvm::Type *Tys[2] = { Ty, VTy };
11932 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11933 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11934 return Builder.CreateTrunc(Ops[0], Int8Ty);
11935 }
11936 case NEON::BI__builtin_neon_vmaxv_u16: {
11937 Int = Intrinsic::aarch64_neon_umaxv;
11938 Ty = Int32Ty;
11939 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11940 llvm::Type *Tys[2] = { Ty, VTy };
11941 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11942 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11943 return Builder.CreateTrunc(Ops[0], Int16Ty);
11944 }
11945 case NEON::BI__builtin_neon_vmaxvq_u8: {
11946 Int = Intrinsic::aarch64_neon_umaxv;
11947 Ty = Int32Ty;
11948 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11949 llvm::Type *Tys[2] = { Ty, VTy };
11950 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11951 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11952 return Builder.CreateTrunc(Ops[0], Int8Ty);
11953 }
11954 case NEON::BI__builtin_neon_vmaxvq_u16: {
11955 Int = Intrinsic::aarch64_neon_umaxv;
11956 Ty = Int32Ty;
11957 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11958 llvm::Type *Tys[2] = { Ty, VTy };
11959 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11960 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11961 return Builder.CreateTrunc(Ops[0], Int16Ty);
11962 }
11963 case NEON::BI__builtin_neon_vmaxv_s8: {
11964 Int = Intrinsic::aarch64_neon_smaxv;
11965 Ty = Int32Ty;
11966 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
11967 llvm::Type *Tys[2] = { Ty, VTy };
11968 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11969 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11970 return Builder.CreateTrunc(Ops[0], Int8Ty);
11971 }
11972 case NEON::BI__builtin_neon_vmaxv_s16: {
11973 Int = Intrinsic::aarch64_neon_smaxv;
11974 Ty = Int32Ty;
11975 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
11976 llvm::Type *Tys[2] = { Ty, VTy };
11977 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11978 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11979 return Builder.CreateTrunc(Ops[0], Int16Ty);
11980 }
11981 case NEON::BI__builtin_neon_vmaxvq_s8: {
11982 Int = Intrinsic::aarch64_neon_smaxv;
11983 Ty = Int32Ty;
11984 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
11985 llvm::Type *Tys[2] = { Ty, VTy };
11986 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11987 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11988 return Builder.CreateTrunc(Ops[0], Int8Ty);
11989 }
11990 case NEON::BI__builtin_neon_vmaxvq_s16: {
11991 Int = Intrinsic::aarch64_neon_smaxv;
11992 Ty = Int32Ty;
11993 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
11994 llvm::Type *Tys[2] = { Ty, VTy };
11995 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11996 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
11997 return Builder.CreateTrunc(Ops[0], Int16Ty);
11998 }
11999 case NEON::BI__builtin_neon_vmaxv_f16: {
12000 Int = Intrinsic::aarch64_neon_fmaxv;
12001 Ty = HalfTy;
12002 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12003 llvm::Type *Tys[2] = { Ty, VTy };
12004 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12005 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12006 return Builder.CreateTrunc(Ops[0], HalfTy);
12007 }
12008 case NEON::BI__builtin_neon_vmaxvq_f16: {
12009 Int = Intrinsic::aarch64_neon_fmaxv;
12010 Ty = HalfTy;
12011 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12012 llvm::Type *Tys[2] = { Ty, VTy };
12013 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12014 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12015 return Builder.CreateTrunc(Ops[0], HalfTy);
12016 }
12017 case NEON::BI__builtin_neon_vminv_u8: {
12018 Int = Intrinsic::aarch64_neon_uminv;
12019 Ty = Int32Ty;
12020 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12021 llvm::Type *Tys[2] = { Ty, VTy };
12022 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12023 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12024 return Builder.CreateTrunc(Ops[0], Int8Ty);
12025 }
12026 case NEON::BI__builtin_neon_vminv_u16: {
12027 Int = Intrinsic::aarch64_neon_uminv;
12028 Ty = Int32Ty;
12029 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12030 llvm::Type *Tys[2] = { Ty, VTy };
12031 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12032 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12033 return Builder.CreateTrunc(Ops[0], Int16Ty);
12034 }
12035 case NEON::BI__builtin_neon_vminvq_u8: {
12036 Int = Intrinsic::aarch64_neon_uminv;
12037 Ty = Int32Ty;
12038 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12039 llvm::Type *Tys[2] = { Ty, VTy };
12040 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12041 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12042 return Builder.CreateTrunc(Ops[0], Int8Ty);
12043 }
12044 case NEON::BI__builtin_neon_vminvq_u16: {
12045 Int = Intrinsic::aarch64_neon_uminv;
12046 Ty = Int32Ty;
12047 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12048 llvm::Type *Tys[2] = { Ty, VTy };
12049 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12050 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12051 return Builder.CreateTrunc(Ops[0], Int16Ty);
12052 }
12053 case NEON::BI__builtin_neon_vminv_s8: {
12054 Int = Intrinsic::aarch64_neon_sminv;
12055 Ty = Int32Ty;
12056 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12057 llvm::Type *Tys[2] = { Ty, VTy };
12058 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12059 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12060 return Builder.CreateTrunc(Ops[0], Int8Ty);
12061 }
12062 case NEON::BI__builtin_neon_vminv_s16: {
12063 Int = Intrinsic::aarch64_neon_sminv;
12064 Ty = Int32Ty;
12065 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12066 llvm::Type *Tys[2] = { Ty, VTy };
12067 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12068 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12069 return Builder.CreateTrunc(Ops[0], Int16Ty);
12070 }
12071 case NEON::BI__builtin_neon_vminvq_s8: {
12072 Int = Intrinsic::aarch64_neon_sminv;
12073 Ty = Int32Ty;
12074 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12075 llvm::Type *Tys[2] = { Ty, VTy };
12076 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12077 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12078 return Builder.CreateTrunc(Ops[0], Int8Ty);
12079 }
12080 case NEON::BI__builtin_neon_vminvq_s16: {
12081 Int = Intrinsic::aarch64_neon_sminv;
12082 Ty = Int32Ty;
12083 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12084 llvm::Type *Tys[2] = { Ty, VTy };
12085 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12086 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12087 return Builder.CreateTrunc(Ops[0], Int16Ty);
12088 }
12089 case NEON::BI__builtin_neon_vminv_f16: {
12090 Int = Intrinsic::aarch64_neon_fminv;
12091 Ty = HalfTy;
12092 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12093 llvm::Type *Tys[2] = { Ty, VTy };
12094 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12095 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12096 return Builder.CreateTrunc(Ops[0], HalfTy);
12097 }
12098 case NEON::BI__builtin_neon_vminvq_f16: {
12099 Int = Intrinsic::aarch64_neon_fminv;
12100 Ty = HalfTy;
12101 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12102 llvm::Type *Tys[2] = { Ty, VTy };
12103 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12104 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12105 return Builder.CreateTrunc(Ops[0], HalfTy);
12106 }
12107 case NEON::BI__builtin_neon_vmaxnmv_f16: {
12108 Int = Intrinsic::aarch64_neon_fmaxnmv;
12109 Ty = HalfTy;
12110 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12111 llvm::Type *Tys[2] = { Ty, VTy };
12112 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12113 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12114 return Builder.CreateTrunc(Ops[0], HalfTy);
12115 }
12116 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
12117 Int = Intrinsic::aarch64_neon_fmaxnmv;
12118 Ty = HalfTy;
12119 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12120 llvm::Type *Tys[2] = { Ty, VTy };
12121 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12122 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12123 return Builder.CreateTrunc(Ops[0], HalfTy);
12124 }
12125 case NEON::BI__builtin_neon_vminnmv_f16: {
12126 Int = Intrinsic::aarch64_neon_fminnmv;
12127 Ty = HalfTy;
12128 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12129 llvm::Type *Tys[2] = { Ty, VTy };
12130 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12131 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12132 return Builder.CreateTrunc(Ops[0], HalfTy);
12133 }
12134 case NEON::BI__builtin_neon_vminnmvq_f16: {
12135 Int = Intrinsic::aarch64_neon_fminnmv;
12136 Ty = HalfTy;
12137 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12138 llvm::Type *Tys[2] = { Ty, VTy };
12139 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12140 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12141 return Builder.CreateTrunc(Ops[0], HalfTy);
12142 }
12143 case NEON::BI__builtin_neon_vmul_n_f64: {
12144 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12145 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
12146 return Builder.CreateFMul(Ops[0], RHS);
12147 }
12148 case NEON::BI__builtin_neon_vaddlv_u8: {
12149 Int = Intrinsic::aarch64_neon_uaddlv;
12150 Ty = Int32Ty;
12151 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12152 llvm::Type *Tys[2] = { Ty, VTy };
12153 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12154 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12155 return Builder.CreateTrunc(Ops[0], Int16Ty);
12156 }
12157 case NEON::BI__builtin_neon_vaddlv_u16: {
12158 Int = Intrinsic::aarch64_neon_uaddlv;
12159 Ty = Int32Ty;
12160 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12161 llvm::Type *Tys[2] = { Ty, VTy };
12162 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12163 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12164 }
12165 case NEON::BI__builtin_neon_vaddlvq_u8: {
12166 Int = Intrinsic::aarch64_neon_uaddlv;
12167 Ty = Int32Ty;
12168 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12169 llvm::Type *Tys[2] = { Ty, VTy };
12170 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12171 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12172 return Builder.CreateTrunc(Ops[0], Int16Ty);
12173 }
12174 case NEON::BI__builtin_neon_vaddlvq_u16: {
12175 Int = Intrinsic::aarch64_neon_uaddlv;
12176 Ty = Int32Ty;
12177 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12178 llvm::Type *Tys[2] = { Ty, VTy };
12179 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12180 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12181 }
12182 case NEON::BI__builtin_neon_vaddlv_s8: {
12183 Int = Intrinsic::aarch64_neon_saddlv;
12184 Ty = Int32Ty;
12185 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12186 llvm::Type *Tys[2] = { Ty, VTy };
12187 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12188 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12189 return Builder.CreateTrunc(Ops[0], Int16Ty);
12190 }
12191 case NEON::BI__builtin_neon_vaddlv_s16: {
12192 Int = Intrinsic::aarch64_neon_saddlv;
12193 Ty = Int32Ty;
12194 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12195 llvm::Type *Tys[2] = { Ty, VTy };
12196 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12197 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12198 }
12199 case NEON::BI__builtin_neon_vaddlvq_s8: {
12200 Int = Intrinsic::aarch64_neon_saddlv;
12201 Ty = Int32Ty;
12202 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12203 llvm::Type *Tys[2] = { Ty, VTy };
12204 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12205 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12206 return Builder.CreateTrunc(Ops[0], Int16Ty);
12207 }
12208 case NEON::BI__builtin_neon_vaddlvq_s16: {
12209 Int = Intrinsic::aarch64_neon_saddlv;
12210 Ty = Int32Ty;
12211 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12212 llvm::Type *Tys[2] = { Ty, VTy };
12213 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12214 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12215 }
12216 case NEON::BI__builtin_neon_vsri_n_v:
12217 case NEON::BI__builtin_neon_vsriq_n_v: {
12218 Int = Intrinsic::aarch64_neon_vsri;
12219 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
12220 return EmitNeonCall(Intrin, Ops, "vsri_n");
12221 }
12222 case NEON::BI__builtin_neon_vsli_n_v:
12223 case NEON::BI__builtin_neon_vsliq_n_v: {
12224 Int = Intrinsic::aarch64_neon_vsli;
12225 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
12226 return EmitNeonCall(Intrin, Ops, "vsli_n");
12227 }
12228 case NEON::BI__builtin_neon_vsra_n_v:
12229 case NEON::BI__builtin_neon_vsraq_n_v:
12230 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12231 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
12232 return Builder.CreateAdd(Ops[0], Ops[1]);
12233 case NEON::BI__builtin_neon_vrsra_n_v:
12234 case NEON::BI__builtin_neon_vrsraq_n_v: {
12235 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
12237 TmpOps.push_back(Ops[1]);
12238 TmpOps.push_back(Ops[2]);
12239 Function* F = CGM.getIntrinsic(Int, Ty);
12240 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
12241 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
12242 return Builder.CreateAdd(Ops[0], tmp);
12243 }
12244 case NEON::BI__builtin_neon_vld1_v:
12245 case NEON::BI__builtin_neon_vld1q_v: {
12246 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
12247 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
12248 }
12249 case NEON::BI__builtin_neon_vst1_v:
12250 case NEON::BI__builtin_neon_vst1q_v:
12251 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
12252 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12253 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12254 case NEON::BI__builtin_neon_vld1_lane_v:
12255 case NEON::BI__builtin_neon_vld1q_lane_v: {
12256 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12257 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
12258 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12259 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
12260 PtrOp0.getAlignment());
12261 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
12262 }
12263 case NEON::BI__builtin_neon_vldap1_lane_s64:
12264 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
12265 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12266 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
12267 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12268 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
12269 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
12270 LI->setAtomic(llvm::AtomicOrdering::Acquire);
12271 Ops[0] = LI;
12272 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
12273 }
12274 case NEON::BI__builtin_neon_vld1_dup_v:
12275 case NEON::BI__builtin_neon_vld1q_dup_v: {
12276 Value *V = PoisonValue::get(Ty);
12277 Ty = llvm::PointerType::getUnqual(VTy->getElementType());
12278 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12279 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
12280 PtrOp0.getAlignment());
12281 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
12282 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
12283 return EmitNeonSplat(Ops[0], CI);
12284 }
12285 case NEON::BI__builtin_neon_vst1_lane_v:
12286 case NEON::BI__builtin_neon_vst1q_lane_v:
12287 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12288 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
12289 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
12290 return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
12291 PtrOp0.getAlignment());
12292 case NEON::BI__builtin_neon_vstl1_lane_s64:
12293 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
12294 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12295 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
12296 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
12297 llvm::StoreInst *SI = Builder.CreateAlignedStore(
12298 Ops[1], Builder.CreateBitCast(Ops[0], Ty), PtrOp0.getAlignment());
12299 SI->setAtomic(llvm::AtomicOrdering::Release);
12300 return SI;
12301 }
12302 case NEON::BI__builtin_neon_vld2_v:
12303 case NEON::BI__builtin_neon_vld2q_v: {
12304 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
12305 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
12306 llvm::Type *Tys[2] = { VTy, PTy };
12307 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
12308 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
12309 Ops[0] = Builder.CreateBitCast(Ops[0],
12310 llvm::PointerType::getUnqual(Ops[1]->getType()));
12311 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12312 }
12313 case NEON::BI__builtin_neon_vld3_v:
12314 case NEON::BI__builtin_neon_vld3q_v: {
12315 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
12316 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
12317 llvm::Type *Tys[2] = { VTy, PTy };
12318 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
12319 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
12320 Ops[0] = Builder.CreateBitCast(Ops[0],
12321 llvm::PointerType::getUnqual(Ops[1]->getType()));
12322 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12323 }
12324 case NEON::BI__builtin_neon_vld4_v:
12325 case NEON::BI__builtin_neon_vld4q_v: {
12326 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
12327 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
12328 llvm::Type *Tys[2] = { VTy, PTy };
12329 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
12330 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
12331 Ops[0] = Builder.CreateBitCast(Ops[0],
12332 llvm::PointerType::getUnqual(Ops[1]->getType()));
12333 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12334 }
12335 case NEON::BI__builtin_neon_vld2_dup_v:
12336 case NEON::BI__builtin_neon_vld2q_dup_v: {
12337 llvm::Type *PTy =
12338 llvm::PointerType::getUnqual(VTy->getElementType());
12339 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
12340 llvm::Type *Tys[2] = { VTy, PTy };
12341 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
12342 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
12343 Ops[0] = Builder.CreateBitCast(Ops[0],
12344 llvm::PointerType::getUnqual(Ops[1]->getType()));
12345 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12346 }
12347 case NEON::BI__builtin_neon_vld3_dup_v:
12348 case NEON::BI__builtin_neon_vld3q_dup_v: {
12349 llvm::Type *PTy =
12350 llvm::PointerType::getUnqual(VTy->getElementType());
12351 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
12352 llvm::Type *Tys[2] = { VTy, PTy };
12353 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
12354 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
12355 Ops[0] = Builder.CreateBitCast(Ops[0],
12356 llvm::PointerType::getUnqual(Ops[1]->getType()));
12357 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12358 }
12359 case NEON::BI__builtin_neon_vld4_dup_v:
12360 case NEON::BI__builtin_neon_vld4q_dup_v: {
12361 llvm::Type *PTy =
12362 llvm::PointerType::getUnqual(VTy->getElementType());
12363 Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
12364 llvm::Type *Tys[2] = { VTy, PTy };
12365 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
12366 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
12367 Ops[0] = Builder.CreateBitCast(Ops[0],
12368 llvm::PointerType::getUnqual(Ops[1]->getType()));
12369 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12370 }
12371 case NEON::BI__builtin_neon_vld2_lane_v:
12372 case NEON::BI__builtin_neon_vld2q_lane_v: {
12373 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
12374 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
12375 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
12376 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12377 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12378 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
12379 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
12380 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
12381 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12382 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12383 }
12384 case NEON::BI__builtin_neon_vld3_lane_v:
12385 case NEON::BI__builtin_neon_vld3q_lane_v: {
12386 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
12387 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
12388 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
12389 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12390 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12391 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
12392 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
12393 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
12394 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
12395 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12396 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12397 }
12398 case NEON::BI__builtin_neon_vld4_lane_v:
12399 case NEON::BI__builtin_neon_vld4q_lane_v: {
12400 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
12401 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
12402 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
12403 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12404 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12405 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
12406 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
12407 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
12408 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
12409 Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
12410 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12411 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12412 }
12413 case NEON::BI__builtin_neon_vst2_v:
12414 case NEON::BI__builtin_neon_vst2q_v: {
12415 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12416 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
12417 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
12418 Ops, "");
12419 }
12420 case NEON::BI__builtin_neon_vst2_lane_v:
12421 case NEON::BI__builtin_neon_vst2q_lane_v: {
12422 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12423 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
12424 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
12425 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
12426 Ops, "");
12427 }
12428 case NEON::BI__builtin_neon_vst3_v:
12429 case NEON::BI__builtin_neon_vst3q_v: {
12430 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12431 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
12432 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
12433 Ops, "");
12434 }
12435 case NEON::BI__builtin_neon_vst3_lane_v:
12436 case NEON::BI__builtin_neon_vst3q_lane_v: {
12437 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12438 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
12439 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
12440 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
12441 Ops, "");
12442 }
12443 case NEON::BI__builtin_neon_vst4_v:
12444 case NEON::BI__builtin_neon_vst4q_v: {
12445 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12446 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
12447 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
12448 Ops, "");
12449 }
12450 case NEON::BI__builtin_neon_vst4_lane_v:
12451 case NEON::BI__builtin_neon_vst4q_lane_v: {
12452 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
12453 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
12454 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
12455 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
12456 Ops, "");
12457 }
12458 case NEON::BI__builtin_neon_vtrn_v:
12459 case NEON::BI__builtin_neon_vtrnq_v: {
12460 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
12461 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12462 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12463 Value *SV = nullptr;
12464
12465 for (unsigned vi = 0; vi != 2; ++vi) {
12466 SmallVector<int, 16> Indices;
12467 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
12468 Indices.push_back(i+vi);
12469 Indices.push_back(i+e+vi);
12470 }
12471 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
12472 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
12473 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
12474 }
12475 return SV;
12476 }
12477 case NEON::BI__builtin_neon_vuzp_v:
12478 case NEON::BI__builtin_neon_vuzpq_v: {
12479 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
12480 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12481 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12482 Value *SV = nullptr;
12483
12484 for (unsigned vi = 0; vi != 2; ++vi) {
12485 SmallVector<int, 16> Indices;
12486 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
12487 Indices.push_back(2*i+vi);
12488
12489 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
12490 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
12491 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
12492 }
12493 return SV;
12494 }
12495 case NEON::BI__builtin_neon_vzip_v:
12496 case NEON::BI__builtin_neon_vzipq_v: {
12497 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
12498 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12499 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12500 Value *SV = nullptr;
12501
12502 for (unsigned vi = 0; vi != 2; ++vi) {
12503 SmallVector<int, 16> Indices;
12504 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
12505 Indices.push_back((i + vi*e) >> 1);
12506 Indices.push_back(((i + vi*e) >> 1)+e);
12507 }
12508 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
12509 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
12510 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
12511 }
12512 return SV;
12513 }
12514 case NEON::BI__builtin_neon_vqtbl1q_v: {
12515 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
12516 Ops, "vtbl1");
12517 }
12518 case NEON::BI__builtin_neon_vqtbl2q_v: {
12519 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
12520 Ops, "vtbl2");
12521 }
12522 case NEON::BI__builtin_neon_vqtbl3q_v: {
12523 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
12524 Ops, "vtbl3");
12525 }
12526 case NEON::BI__builtin_neon_vqtbl4q_v: {
12527 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
12528 Ops, "vtbl4");
12529 }
12530 case NEON::BI__builtin_neon_vqtbx1q_v: {
12531 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
12532 Ops, "vtbx1");
12533 }
12534 case NEON::BI__builtin_neon_vqtbx2q_v: {
12535 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
12536 Ops, "vtbx2");
12537 }
12538 case NEON::BI__builtin_neon_vqtbx3q_v: {
12539 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
12540 Ops, "vtbx3");
12541 }
12542 case NEON::BI__builtin_neon_vqtbx4q_v: {
12543 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
12544 Ops, "vtbx4");
12545 }
12546 case NEON::BI__builtin_neon_vsqadd_v:
12547 case NEON::BI__builtin_neon_vsqaddq_v: {
12548 Int = Intrinsic::aarch64_neon_usqadd;
12549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
12550 }
12551 case NEON::BI__builtin_neon_vuqadd_v:
12552 case NEON::BI__builtin_neon_vuqaddq_v: {
12553 Int = Intrinsic::aarch64_neon_suqadd;
12554 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
12555 }
12556 }
12557}
12558
12559Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
12560 const CallExpr *E) {
12561 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
12562 BuiltinID == BPF::BI__builtin_btf_type_id ||
12563 BuiltinID == BPF::BI__builtin_preserve_type_info ||
12564 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
12565 "unexpected BPF builtin");
12566
12567 // A sequence number, injected into IR builtin functions, to
12568 // prevent CSE given the only difference of the function
12569 // may just be the debuginfo metadata.
12570 static uint32_t BuiltinSeqNum;
12571
12572 switch (BuiltinID) {
12573 default:
12574 llvm_unreachable("Unexpected BPF builtin");
12575 case BPF::BI__builtin_preserve_field_info: {
12576 const Expr *Arg = E->getArg(0);
12577 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
12578
12579 if (!getDebugInfo()) {
12580 CGM.Error(E->getExprLoc(),
12581 "using __builtin_preserve_field_info() without -g");
12582 return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
12583 : EmitLValue(Arg).getPointer(*this);
12584 }
12585
12586 // Enable underlying preserve_*_access_index() generation.
12587 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
12588 IsInPreservedAIRegion = true;
12589 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
12590 : EmitLValue(Arg).getPointer(*this);
12591 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
12592
12593 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12594 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
12595
12596 // Built the IR for the preserve_field_info intrinsic.
12597 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
12598 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
12599 {FieldAddr->getType()});
12600 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
12601 }
12602 case BPF::BI__builtin_btf_type_id:
12603 case BPF::BI__builtin_preserve_type_info: {
12604 if (!getDebugInfo()) {
12605 CGM.Error(E->getExprLoc(), "using builtin function without -g");
12606 return nullptr;
12607 }
12608
12609 const Expr *Arg0 = E->getArg(0);
12610 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
12611 Arg0->getType(), Arg0->getExprLoc());
12612
12613 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12614 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
12615 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
12616
12617 llvm::Function *FnDecl;
12618 if (BuiltinID == BPF::BI__builtin_btf_type_id)
12619 FnDecl = llvm::Intrinsic::getDeclaration(
12620 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
12621 else
12622 FnDecl = llvm::Intrinsic::getDeclaration(
12623 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
12624 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
12625 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
12626 return Fn;
12627 }
12628 case BPF::BI__builtin_preserve_enum_value: {
12629 if (!getDebugInfo()) {
12630 CGM.Error(E->getExprLoc(), "using builtin function without -g");
12631 return nullptr;
12632 }
12633
12634 const Expr *Arg0 = E->getArg(0);
12635 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
12636 Arg0->getType(), Arg0->getExprLoc());
12637
12638 // Find enumerator
12639 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
12640 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
12641 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
12642 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
12643
12644 auto &InitVal = Enumerator->getInitVal();
12645 std::string InitValStr;
12646 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
12647 InitValStr = std::to_string(InitVal.getSExtValue());
12648 else
12649 InitValStr = std::to_string(InitVal.getZExtValue());
12650 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
12651 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
12652
12653 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12654 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
12655 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
12656
12657 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
12658 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
12659 CallInst *Fn =
12660 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
12661 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
12662 return Fn;
12663 }
12664 }
12665}
12666
12667llvm::Value *CodeGenFunction::
12669 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
12670 "Not a power-of-two sized vector!");
12671 bool AllConstants = true;
12672 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
12673 AllConstants &= isa<Constant>(Ops[i]);
12674
12675 // If this is a constant vector, create a ConstantVector.
12676 if (AllConstants) {
12678 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
12679 CstOps.push_back(cast<Constant>(Ops[i]));
12680 return llvm::ConstantVector::get(CstOps);
12681 }
12682
12683 // Otherwise, insertelement the values to build the vector.
12684 Value *Result = llvm::PoisonValue::get(
12685 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
12686
12687 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
12688 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
12689
12690 return Result;
12691}
12692
12693// Convert the mask from an integer type to a vector of i1.
12695 unsigned NumElts) {
12696
12697 auto *MaskTy = llvm::FixedVectorType::get(
12698 CGF.Builder.getInt1Ty(),
12699 cast<IntegerType>(Mask->getType())->getBitWidth());
12700 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
12701
12702 // If we have less than 8 elements, then the starting mask was an i8 and
12703 // we need to extract down to the right number of elements.
12704 if (NumElts < 8) {
12705 int Indices[4];
12706 for (unsigned i = 0; i != NumElts; ++i)
12707 Indices[i] = i;
12708 MaskVec = CGF.Builder.CreateShuffleVector(
12709 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
12710 }
12711 return MaskVec;
12712}
12713
12715 Align Alignment) {
12716 // Cast the pointer to right type.
12717 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
12718 llvm::PointerType::getUnqual(Ops[1]->getType()));
12719
12720 Value *MaskVec = getMaskVecValue(
12721 CGF, Ops[2],
12722 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
12723
12724 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
12725}
12726
12728 Align Alignment) {
12729 // Cast the pointer to right type.
12730 llvm::Type *Ty = Ops[1]->getType();
12731 Value *Ptr =
12732 CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
12733
12734 Value *MaskVec = getMaskVecValue(
12735 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
12736
12737 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
12738}
12739
12741 ArrayRef<Value *> Ops) {
12742 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
12743 llvm::Type *PtrTy = ResultTy->getElementType();
12744
12745 // Cast the pointer to element type.
12746 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
12747 llvm::PointerType::getUnqual(PtrTy));
12748
12749 Value *MaskVec = getMaskVecValue(
12750 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
12751
12752 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
12753 ResultTy);
12754 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
12755}
12756
12759 bool IsCompress) {
12760 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
12761
12762 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
12763
12764 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
12765 : Intrinsic::x86_avx512_mask_expand;
12766 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
12767 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
12768}
12769
12771 ArrayRef<Value *> Ops) {
12772 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
12773 llvm::Type *PtrTy = ResultTy->getElementType();
12774
12775 // Cast the pointer to element type.
12776 Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
12777 llvm::PointerType::getUnqual(PtrTy));
12778
12779 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
12780
12781 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
12782 ResultTy);
12783 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
12784}
12785
12786static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
12788 bool InvertLHS = false) {
12789 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
12790 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
12791 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
12792
12793 if (InvertLHS)
12794 LHS = CGF.Builder.CreateNot(LHS);
12795
12796 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
12797 Ops[0]->getType());
12798}
12799
12801 Value *Amt, bool IsRight) {
12802 llvm::Type *Ty = Op0->getType();
12803
12804 // Amount may be scalar immediate, in which case create a splat vector.
12805 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
12806 // we only care about the lowest log2 bits anyway.
12807 if (Amt->getType() != Ty) {
12808 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
12809 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
12810 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
12811 }
12812
12813 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
12814 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
12815 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
12816}
12817
12819 bool IsSigned) {
12820 Value *Op0 = Ops[0];
12821 Value *Op1 = Ops[1];
12822 llvm::Type *Ty = Op0->getType();
12823 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
12824
12825 CmpInst::Predicate Pred;
12826 switch (Imm) {
12827 case 0x0:
12828 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
12829 break;
12830 case 0x1:
12831 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
12832 break;
12833 case 0x2:
12834 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
12835 break;
12836 case 0x3:
12837 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
12838 break;
12839 case 0x4:
12840 Pred = ICmpInst::ICMP_EQ;
12841 break;
12842 case 0x5:
12843 Pred = ICmpInst::ICMP_NE;
12844 break;
12845 case 0x6:
12846 return llvm::Constant::getNullValue(Ty); // FALSE
12847 case 0x7:
12848 return llvm::Constant::getAllOnesValue(Ty); // TRUE
12849 default:
12850 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
12851 }
12852
12853 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
12854 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
12855 return Res;
12856}
12857
12859 Value *Mask, Value *Op0, Value *Op1) {
12860
12861 // If the mask is all ones just return first argument.
12862 if (const auto *C = dyn_cast<Constant>(Mask))
12863 if (C->isAllOnesValue())
12864 return Op0;
12865
12866 Mask = getMaskVecValue(
12867 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
12868
12869 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
12870}
12871
12873 Value *Mask, Value *Op0, Value *Op1) {
12874 // If the mask is all ones just return first argument.
12875 if (const auto *C = dyn_cast<Constant>(Mask))
12876 if (C->isAllOnesValue())
12877 return Op0;
12878
12879 auto *MaskTy = llvm::FixedVectorType::get(
12880 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
12881 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
12882 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
12883 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
12884}
12885
12887 unsigned NumElts, Value *MaskIn) {
12888 if (MaskIn) {
12889 const auto *C = dyn_cast<Constant>(MaskIn);
12890 if (!C || !C->isAllOnesValue())
12891 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
12892 }
12893
12894 if (NumElts < 8) {
12895 int Indices[8];
12896 for (unsigned i = 0; i != NumElts; ++i)
12897 Indices[i] = i;
12898 for (unsigned i = NumElts; i != 8; ++i)
12899 Indices[i] = i % NumElts + NumElts;
12900 Cmp = CGF.Builder.CreateShuffleVector(
12901 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
12902 }
12903
12904 return CGF.Builder.CreateBitCast(Cmp,
12905 IntegerType::get(CGF.getLLVMContext(),
12906 std::max(NumElts, 8U)));
12907}
12908
12910 bool Signed, ArrayRef<Value *> Ops) {
12911 assert((Ops.size() == 2 || Ops.size() == 4) &&
12912 "Unexpected number of arguments");
12913 unsigned NumElts =
12914 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
12915 Value *Cmp;
12916
12917 if (CC == 3) {
12918 Cmp = Constant::getNullValue(
12919 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
12920 } else if (CC == 7) {
12921 Cmp = Constant::getAllOnesValue(
12922 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
12923 } else {
12924 ICmpInst::Predicate Pred;
12925 switch (CC) {
12926 default: llvm_unreachable("Unknown condition code");
12927 case 0: Pred = ICmpInst::ICMP_EQ; break;
12928 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
12929 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
12930 case 4: Pred = ICmpInst::ICMP_NE; break;
12931 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
12932 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
12933 }
12934 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
12935 }
12936
12937 Value *MaskIn = nullptr;
12938 if (Ops.size() == 4)
12939 MaskIn = Ops[3];
12940
12941 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
12942}
12943
12945 Value *Zero = Constant::getNullValue(In->getType());
12946 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
12947}
12948
12950 ArrayRef<Value *> Ops, bool IsSigned) {
12951 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
12952 llvm::Type *Ty = Ops[1]->getType();
12953
12954 Value *Res;
12955 if (Rnd != 4) {
12956 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
12957 : Intrinsic::x86_avx512_uitofp_round;
12958 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
12959 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
12960 } else {
12961 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
12962 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
12963 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
12964 }
12965
12966 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
12967}
12968
12969// Lowers X86 FMA intrinsics to IR.
12971 ArrayRef<Value *> Ops, unsigned BuiltinID,
12972 bool IsAddSub) {
12973
12974 bool Subtract = false;
12975 Intrinsic::ID IID = Intrinsic::not_intrinsic;
12976 switch (BuiltinID) {
12977 default: break;
12978 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
12979 Subtract = true;
12980 [[fallthrough]];
12981 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
12982 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
12983 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
12984 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
12985 break;
12986 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
12987 Subtract = true;
12988 [[fallthrough]];
12989 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
12990 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
12991 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
12992 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
12993 break;
12994 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
12995 Subtract = true;
12996 [[fallthrough]];
12997 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
12998 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
12999 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13000 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13001 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13002 Subtract = true;
13003 [[fallthrough]];
13004 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13005 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13006 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13007 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13008 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13009 Subtract = true;
13010 [[fallthrough]];
13011 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13012 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13013 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13014 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13015 break;
13016 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13017 Subtract = true;
13018 [[fallthrough]];
13019 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13020 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13021 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13022 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13023 break;
13024 }
13025
13026 Value *A = Ops[0];
13027 Value *B = Ops[1];
13028 Value *C = Ops[2];
13029
13030 if (Subtract)
13031 C = CGF.Builder.CreateFNeg(C);
13032
13033 Value *Res;
13034
13035 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13036 if (IID != Intrinsic::not_intrinsic &&
13037 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13038 IsAddSub)) {
13039 Function *Intr = CGF.CGM.getIntrinsic(IID);
13040 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13041 } else {
13042 llvm::Type *Ty = A->getType();
13043 Function *FMA;
13044 if (CGF.Builder.getIsFPConstrained()) {
13045 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13046 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13047 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13048 } else {
13049 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13050 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13051 }
13052 }
13053
13054 // Handle any required masking.
13055 Value *MaskFalseVal = nullptr;
13056 switch (BuiltinID) {
13057 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13058 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13059 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13060 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13061 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13062 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13063 MaskFalseVal = Ops[0];
13064 break;
13065 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13066 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13067 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13068 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13069 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13070 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13071 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13072 break;
13073 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13074 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13075 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13076 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13077 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13078 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13079 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13080 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13081 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13082 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13083 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13084 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13085 MaskFalseVal = Ops[2];
13086 break;
13087 }
13088
13089 if (MaskFalseVal)
13090 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
13091
13092 return Res;
13093}
13094
13096 MutableArrayRef<Value *> Ops, Value *Upper,
13097 bool ZeroMask = false, unsigned PTIdx = 0,
13098 bool NegAcc = false) {
13099 unsigned Rnd = 4;
13100 if (Ops.size() > 4)
13101 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
13102
13103 if (NegAcc)
13104 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
13105
13106 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13107 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13108 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
13109 Value *Res;
13110 if (Rnd != 4) {
13111 Intrinsic::ID IID;
13112
13113 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13114 case 16:
13115 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13116 break;
13117 case 32:
13118 IID = Intrinsic::x86_avx512_vfmadd_f32;
13119 break;
13120 case 64:
13121 IID = Intrinsic::x86_avx512_vfmadd_f64;
13122 break;
13123 default:
13124 llvm_unreachable("Unexpected size");
13125 }
13126 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13127 {Ops[0], Ops[1], Ops[2], Ops[4]});
13128 } else if (CGF.Builder.getIsFPConstrained()) {
13129 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13130 Function *FMA = CGF.CGM.getIntrinsic(
13131 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13132 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
13133 } else {
13134 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13135 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
13136 }
13137 // If we have more than 3 arguments, we need to do masking.
13138 if (Ops.size() > 3) {
13139 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
13140 : Ops[PTIdx];
13141
13142 // If we negated the accumulator and the its the PassThru value we need to
13143 // bypass the negate. Conveniently Upper should be the same thing in this
13144 // case.
13145 if (NegAcc && PTIdx == 2)
13146 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
13147
13148 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
13149 }
13150 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
13151}
13152
13153static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
13154 ArrayRef<Value *> Ops) {
13155 llvm::Type *Ty = Ops[0]->getType();
13156 // Arguments have a vXi32 type so cast to vXi64.
13157 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
13158 Ty->getPrimitiveSizeInBits() / 64);
13159 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
13160 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
13161
13162 if (IsSigned) {
13163 // Shift left then arithmetic shift right.
13164 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
13165 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
13166 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
13167 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
13168 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
13169 } else {
13170 // Clear the upper bits.
13171 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
13172 LHS = CGF.Builder.CreateAnd(LHS, Mask);
13173 RHS = CGF.Builder.CreateAnd(RHS, Mask);
13174 }
13175
13176 return CGF.Builder.CreateMul(LHS, RHS);
13177}
13178
13179// Emit a masked pternlog intrinsic. This only exists because the header has to
13180// use a macro and we aren't able to pass the input argument to a pternlog
13181// builtin and a select builtin without evaluating it twice.
13182static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
13183 ArrayRef<Value *> Ops) {
13184 llvm::Type *Ty = Ops[0]->getType();
13185
13186 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
13187 unsigned EltWidth = Ty->getScalarSizeInBits();
13188 Intrinsic::ID IID;
13189 if (VecWidth == 128 && EltWidth == 32)
13190 IID = Intrinsic::x86_avx512_pternlog_d_128;
13191 else if (VecWidth == 256 && EltWidth == 32)
13192 IID = Intrinsic::x86_avx512_pternlog_d_256;
13193 else if (VecWidth == 512 && EltWidth == 32)
13194 IID = Intrinsic::x86_avx512_pternlog_d_512;
13195 else if (VecWidth == 128 && EltWidth == 64)
13196 IID = Intrinsic::x86_avx512_pternlog_q_128;
13197 else if (VecWidth == 256 && EltWidth == 64)
13198 IID = Intrinsic::x86_avx512_pternlog_q_256;
13199 else if (VecWidth == 512 && EltWidth == 64)
13200 IID = Intrinsic::x86_avx512_pternlog_q_512;
13201 else
13202 llvm_unreachable("Unexpected intrinsic");
13203
13204 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13205 Ops.drop_back());
13206 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
13207 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
13208}
13209
13211 llvm::Type *DstTy) {
13212 unsigned NumberOfElements =
13213 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13214 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
13215 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
13216}
13217
13218Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
13219 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
13220 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
13221 return EmitX86CpuIs(CPUStr);
13222}
13223
13224// Convert F16 halfs to floats.
13227 llvm::Type *DstTy) {
13228 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
13229 "Unknown cvtph2ps intrinsic");
13230
13231 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
13232 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
13233 Function *F =
13234 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
13235 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
13236 }
13237
13238 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13239 Value *Src = Ops[0];
13240
13241 // Extract the subvector.
13242 if (NumDstElts !=
13243 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
13244 assert(NumDstElts == 4 && "Unexpected vector size");
13245 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
13246 }
13247
13248 // Bitcast from vXi16 to vXf16.
13249 auto *HalfTy = llvm::FixedVectorType::get(
13250 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
13251 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
13252
13253 // Perform the fp-extension.
13254 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
13255
13256 if (Ops.size() >= 3)
13257 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13258 return Res;
13259}
13260
13261Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
13262
13263 llvm::Type *Int32Ty = Builder.getInt32Ty();
13264
13265 // Matching the struct layout from the compiler-rt/libgcc structure that is
13266 // filled in:
13267 // unsigned int __cpu_vendor;
13268 // unsigned int __cpu_type;
13269 // unsigned int __cpu_subtype;
13270 // unsigned int __cpu_features[1];
13271 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13272 llvm::ArrayType::get(Int32Ty, 1));
13273
13274 // Grab the global __cpu_model.
13275 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13276 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13277
13278 // Calculate the index needed to access the correct field based on the
13279 // range. Also adjust the expected value.
13280 unsigned Index;
13281 unsigned Value;
13282 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
13283#define X86_VENDOR(ENUM, STRING) \
13284 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
13285#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
13286 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13287#define X86_CPU_TYPE(ENUM, STR) \
13288 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13289#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
13290 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13291#define X86_CPU_SUBTYPE(ENUM, STR) \
13292 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13293#include "llvm/TargetParser/X86TargetParser.def"
13294 .Default({0, 0});
13295 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
13296
13297 // Grab the appropriate field from __cpu_model.
13298 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
13299 ConstantInt::get(Int32Ty, Index)};
13300 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
13301 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
13303
13304 // Check the value of the field against the requested value.
13305 return Builder.CreateICmpEQ(CpuValue,
13306 llvm::ConstantInt::get(Int32Ty, Value));
13307}
13308
13309Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
13310 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
13311 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
13312 return EmitX86CpuSupports(FeatureStr);
13313}
13314
13315Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
13316 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
13317}
13318
13319llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
13320 uint32_t Features1 = Lo_32(FeaturesMask);
13321 uint32_t Features2 = Hi_32(FeaturesMask);
13322
13323 Value *Result = Builder.getTrue();
13324
13325 if (Features1 != 0) {
13326 // Matching the struct layout from the compiler-rt/libgcc structure that is
13327 // filled in:
13328 // unsigned int __cpu_vendor;
13329 // unsigned int __cpu_type;
13330 // unsigned int __cpu_subtype;
13331 // unsigned int __cpu_features[1];
13332 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13333 llvm::ArrayType::get(Int32Ty, 1));
13334
13335 // Grab the global __cpu_model.
13336 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13337 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13338
13339 // Grab the first (0th) element from the field __cpu_features off of the
13340 // global in the struct STy.
13341 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
13342 Builder.getInt32(0)};
13343 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
13344 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
13346
13347 // Check the value of the bit corresponding to the feature requested.
13348 Value *Mask = Builder.getInt32(Features1);
13349 Value *Bitset = Builder.CreateAnd(Features, Mask);
13350 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13351 Result = Builder.CreateAnd(Result, Cmp);
13352 }
13353
13354 if (Features2 != 0) {
13355 llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
13356 "__cpu_features2");
13357 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
13358
13359 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2,
13361
13362 // Check the value of the bit corresponding to the feature requested.
13363 Value *Mask = Builder.getInt32(Features2);
13364 Value *Bitset = Builder.CreateAnd(Features, Mask);
13365 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13366 Result = Builder.CreateAnd(Result, Cmp);
13367 }
13368
13369 return Result;
13370}
13371
13372Value *CodeGenFunction::EmitAArch64CpuInit() {
13373 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
13374 llvm::FunctionCallee Func =
13375 CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver");
13376 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
13377 cast<llvm::GlobalValue>(Func.getCallee())
13378 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
13379 return Builder.CreateCall(Func);
13380}
13381
13382Value *CodeGenFunction::EmitX86CpuInit() {
13383 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
13384 /*Variadic*/ false);
13385 llvm::FunctionCallee Func =
13386 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
13387 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
13388 cast<llvm::GlobalValue>(Func.getCallee())
13389 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
13390 return Builder.CreateCall(Func);
13391}
13392
13393llvm::Value *
13394CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
13395 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
13396 Value *Result = Builder.getTrue();
13397 if (FeaturesMask != 0) {
13398 // Get features from structure in runtime library
13399 // struct {
13400 // unsigned long long features;
13401 // } __aarch64_cpu_features;
13402 llvm::Type *STy = llvm::StructType::get(Int64Ty);
13403 llvm::Constant *AArch64CPUFeatures =
13404 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
13405 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
13406 llvm::Value *CpuFeatures = Builder.CreateGEP(
13407 STy, AArch64CPUFeatures,
13408 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
13409 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
13411 Value *Mask = Builder.getInt64(FeaturesMask);
13412 Value *Bitset = Builder.CreateAnd(Features, Mask);
13413 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13414 Result = Builder.CreateAnd(Result, Cmp);
13415 }
13416 return Result;
13417}
13418
13419Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
13420 const CallExpr *E) {
13421 if (BuiltinID == X86::BI__builtin_cpu_is)
13422 return EmitX86CpuIs(E);
13423 if (BuiltinID == X86::BI__builtin_cpu_supports)
13424 return EmitX86CpuSupports(E);
13425 if (BuiltinID == X86::BI__builtin_cpu_init)
13426 return EmitX86CpuInit();
13427
13428 // Handle MSVC intrinsics before argument evaluation to prevent double
13429 // evaluation.
13430 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
13431 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
13432
13434 bool IsMaskFCmp = false;
13435 bool IsConjFMA = false;
13436
13437 // Find out if any arguments are required to be integer constant expressions.
13438 unsigned ICEArguments = 0;
13440 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
13441 assert(Error == ASTContext::GE_None && "Should not codegen an error");
13442
13443 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
13444 // If this is a normal argument, just emit it as a scalar.
13445 if ((ICEArguments & (1 << i)) == 0) {
13446 Ops.push_back(EmitScalarExpr(E->getArg(i)));
13447 continue;
13448 }
13449
13450 // If this is required to be a constant, constant fold it so that we know
13451 // that the generated intrinsic gets a ConstantInt.
13452 Ops.push_back(llvm::ConstantInt::get(
13454 }
13455
13456 // These exist so that the builtin that takes an immediate can be bounds
13457 // checked by clang to avoid passing bad immediates to the backend. Since
13458 // AVX has a larger immediate than SSE we would need separate builtins to
13459 // do the different bounds checking. Rather than create a clang specific
13460 // SSE only builtin, this implements eight separate builtins to match gcc
13461 // implementation.
13462 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
13463 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
13464 llvm::Function *F = CGM.getIntrinsic(ID);
13465 return Builder.CreateCall(F, Ops);
13466 };
13467
13468 // For the vector forms of FP comparisons, translate the builtins directly to
13469 // IR.
13470 // TODO: The builtins could be removed if the SSE header files used vector
13471 // extension comparisons directly (vector ordered/unordered may need
13472 // additional support via __builtin_isnan()).
13473 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
13474 bool IsSignaling) {
13475 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
13476 Value *Cmp;
13477 if (IsSignaling)
13478 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
13479 else
13480 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
13481 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
13482 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
13483 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
13484 return Builder.CreateBitCast(Sext, FPVecTy);
13485 };
13486
13487 switch (BuiltinID) {
13488 default: return nullptr;
13489 case X86::BI_mm_prefetch: {
13490 Value *Address = Ops[0];
13491 ConstantInt *C = cast<ConstantInt>(Ops[1]);
13492 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
13493 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
13494 Value *Data = ConstantInt::get(Int32Ty, 1);
13495 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
13496 return Builder.CreateCall(F, {Address, RW, Locality, Data});
13497 }
13498 case X86::BI_mm_clflush: {
13499 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
13500 Ops[0]);
13501 }
13502 case X86::BI_mm_lfence: {
13503 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
13504 }
13505 case X86::BI_mm_mfence: {
13506 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
13507 }
13508 case X86::BI_mm_sfence: {
13509 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
13510 }
13511 case X86::BI_mm_pause: {
13512 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
13513 }
13514 case X86::BI__rdtsc: {
13515 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
13516 }
13517 case X86::BI__builtin_ia32_rdtscp: {
13518 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
13519 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
13520 Ops[0]);
13521 return Builder.CreateExtractValue(Call, 0);
13522 }
13523 case X86::BI__builtin_ia32_lzcnt_u16:
13524 case X86::BI__builtin_ia32_lzcnt_u32:
13525 case X86::BI__builtin_ia32_lzcnt_u64: {
13526 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
13527 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
13528 }
13529 case X86::BI__builtin_ia32_tzcnt_u16:
13530 case X86::BI__builtin_ia32_tzcnt_u32:
13531 case X86::BI__builtin_ia32_tzcnt_u64: {
13532 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
13533 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
13534 }
13535 case X86::BI__builtin_ia32_undef128:
13536 case X86::BI__builtin_ia32_undef256:
13537 case X86::BI__builtin_ia32_undef512:
13538 // The x86 definition of "undef" is not the same as the LLVM definition
13539 // (PR32176). We leave optimizing away an unnecessary zero constant to the
13540 // IR optimizer and backend.
13541 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
13542 // value, we should use that here instead of a zero.
13543 return llvm::Constant::getNullValue(ConvertType(E->getType()));
13544 case X86::BI__builtin_ia32_vec_init_v8qi:
13545 case X86::BI__builtin_ia32_vec_init_v4hi:
13546 case X86::BI__builtin_ia32_vec_init_v2si:
13547 return Builder.CreateBitCast(BuildVector(Ops),
13548 llvm::Type::getX86_MMXTy(getLLVMContext()));
13549 case X86::BI__builtin_ia32_vec_ext_v2si:
13550 case X86::BI__builtin_ia32_vec_ext_v16qi:
13551 case X86::BI__builtin_ia32_vec_ext_v8hi:
13552 case X86::BI__builtin_ia32_vec_ext_v4si:
13553 case X86::BI__builtin_ia32_vec_ext_v4sf:
13554 case X86::BI__builtin_ia32_vec_ext_v2di:
13555 case X86::BI__builtin_ia32_vec_ext_v32qi:
13556 case X86::BI__builtin_ia32_vec_ext_v16hi:
13557 case X86::BI__builtin_ia32_vec_ext_v8si:
13558 case X86::BI__builtin_ia32_vec_ext_v4di: {
13559 unsigned NumElts =
13560 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13561 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
13562 Index &= NumElts - 1;
13563 // These builtins exist so we can ensure the index is an ICE and in range.
13564 // Otherwise we could just do this in the header file.
13565 return Builder.CreateExtractElement(Ops[0], Index);
13566 }
13567 case X86::BI__builtin_ia32_vec_set_v16qi:
13568 case X86::BI__builtin_ia32_vec_set_v8hi:
13569 case X86::BI__builtin_ia32_vec_set_v4si:
13570 case X86::BI__builtin_ia32_vec_set_v2di:
13571 case X86::BI__builtin_ia32_vec_set_v32qi:
13572 case X86::BI__builtin_ia32_vec_set_v16hi:
13573 case X86::BI__builtin_ia32_vec_set_v8si:
13574 case X86::BI__builtin_ia32_vec_set_v4di: {
13575 unsigned NumElts =
13576 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13577 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
13578 Index &= NumElts - 1;
13579 // These builtins exist so we can ensure the index is an ICE and in range.
13580 // Otherwise we could just do this in the header file.
13581 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
13582 }
13583 case X86::BI_mm_setcsr:
13584 case X86::BI__builtin_ia32_ldmxcsr: {
13585 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
13586 Builder.CreateStore(Ops[0], Tmp);
13587 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
13588 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
13589 }
13590 case X86::BI_mm_getcsr:
13591 case X86::BI__builtin_ia32_stmxcsr: {
13592 Address Tmp = CreateMemTemp(E->getType());
13593 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
13594 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
13595 return Builder.CreateLoad(Tmp, "stmxcsr");
13596 }
13597 case X86::BI__builtin_ia32_xsave:
13598 case X86::BI__builtin_ia32_xsave64:
13599 case X86::BI__builtin_ia32_xrstor:
13600 case X86::BI__builtin_ia32_xrstor64:
13601 case X86::BI__builtin_ia32_xsaveopt:
13602 case X86::BI__builtin_ia32_xsaveopt64:
13603 case X86::BI__builtin_ia32_xrstors:
13604 case X86::BI__builtin_ia32_xrstors64:
13605 case X86::BI__builtin_ia32_xsavec:
13606 case X86::BI__builtin_ia32_xsavec64:
13607 case X86::BI__builtin_ia32_xsaves:
13608 case X86::BI__builtin_ia32_xsaves64:
13609 case X86::BI__builtin_ia32_xsetbv:
13610 case X86::BI_xsetbv: {
13611 Intrinsic::ID ID;
13612#define INTRINSIC_X86_XSAVE_ID(NAME) \
13613 case X86::BI__builtin_ia32_##NAME: \
13614 ID = Intrinsic::x86_##NAME; \
13615 break
13616 switch (BuiltinID) {
13617 default: llvm_unreachable("Unsupported intrinsic!");
13619 INTRINSIC_X86_XSAVE_ID(xsave64);
13620 INTRINSIC_X86_XSAVE_ID(xrstor);
13621 INTRINSIC_X86_XSAVE_ID(xrstor64);
13622 INTRINSIC_X86_XSAVE_ID(xsaveopt);
13623 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
13624 INTRINSIC_X86_XSAVE_ID(xrstors);
13625 INTRINSIC_X86_XSAVE_ID(xrstors64);
13626 INTRINSIC_X86_XSAVE_ID(xsavec);
13627 INTRINSIC_X86_XSAVE_ID(xsavec64);
13628 INTRINSIC_X86_XSAVE_ID(xsaves);
13629 INTRINSIC_X86_XSAVE_ID(xsaves64);
13630 INTRINSIC_X86_XSAVE_ID(xsetbv);
13631 case X86::BI_xsetbv:
13632 ID = Intrinsic::x86_xsetbv;
13633 break;
13634 }
13635#undef INTRINSIC_X86_XSAVE_ID
13636 Value *Mhi = Builder.CreateTrunc(
13637 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
13638 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
13639 Ops[1] = Mhi;
13640 Ops.push_back(Mlo);
13641 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13642 }
13643 case X86::BI__builtin_ia32_xgetbv:
13644 case X86::BI_xgetbv:
13645 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
13646 case X86::BI__builtin_ia32_storedqudi128_mask:
13647 case X86::BI__builtin_ia32_storedqusi128_mask:
13648 case X86::BI__builtin_ia32_storedquhi128_mask:
13649 case X86::BI__builtin_ia32_storedquqi128_mask:
13650 case X86::BI__builtin_ia32_storeupd128_mask:
13651 case X86::BI__builtin_ia32_storeups128_mask:
13652 case X86::BI__builtin_ia32_storedqudi256_mask:
13653 case X86::BI__builtin_ia32_storedqusi256_mask:
13654 case X86::BI__builtin_ia32_storedquhi256_mask:
13655 case X86::BI__builtin_ia32_storedquqi256_mask:
13656 case X86::BI__builtin_ia32_storeupd256_mask:
13657 case X86::BI__builtin_ia32_storeups256_mask:
13658 case X86::BI__builtin_ia32_storedqudi512_mask:
13659 case X86::BI__builtin_ia32_storedqusi512_mask:
13660 case X86::BI__builtin_ia32_storedquhi512_mask:
13661 case X86::BI__builtin_ia32_storedquqi512_mask:
13662 case X86::BI__builtin_ia32_storeupd512_mask:
13663 case X86::BI__builtin_ia32_storeups512_mask:
13664 return EmitX86MaskedStore(*this, Ops, Align(1));
13665
13666 case X86::BI__builtin_ia32_storesh128_mask:
13667 case X86::BI__builtin_ia32_storess128_mask:
13668 case X86::BI__builtin_ia32_storesd128_mask:
13669 return EmitX86MaskedStore(*this, Ops, Align(1));
13670
13671 case X86::BI__builtin_ia32_vpopcntb_128:
13672 case X86::BI__builtin_ia32_vpopcntd_128:
13673 case X86::BI__builtin_ia32_vpopcntq_128:
13674 case X86::BI__builtin_ia32_vpopcntw_128:
13675 case X86::BI__builtin_ia32_vpopcntb_256:
13676 case X86::BI__builtin_ia32_vpopcntd_256:
13677 case X86::BI__builtin_ia32_vpopcntq_256:
13678 case X86::BI__builtin_ia32_vpopcntw_256:
13679 case X86::BI__builtin_ia32_vpopcntb_512:
13680 case X86::BI__builtin_ia32_vpopcntd_512:
13681 case X86::BI__builtin_ia32_vpopcntq_512:
13682 case X86::BI__builtin_ia32_vpopcntw_512: {
13683 llvm::Type *ResultType = ConvertType(E->getType());
13684 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
13685 return Builder.CreateCall(F, Ops);
13686 }
13687 case X86::BI__builtin_ia32_cvtmask2b128:
13688 case X86::BI__builtin_ia32_cvtmask2b256:
13689 case X86::BI__builtin_ia32_cvtmask2b512:
13690 case X86::BI__builtin_ia32_cvtmask2w128:
13691 case X86::BI__builtin_ia32_cvtmask2w256:
13692 case X86::BI__builtin_ia32_cvtmask2w512:
13693 case X86::BI__builtin_ia32_cvtmask2d128:
13694 case X86::BI__builtin_ia32_cvtmask2d256:
13695 case X86::BI__builtin_ia32_cvtmask2d512:
13696 case X86::BI__builtin_ia32_cvtmask2q128:
13697 case X86::BI__builtin_ia32_cvtmask2q256:
13698 case X86::BI__builtin_ia32_cvtmask2q512:
13699 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
13700
13701 case X86::BI__builtin_ia32_cvtb2mask128:
13702 case X86::BI__builtin_ia32_cvtb2mask256:
13703 case X86::BI__builtin_ia32_cvtb2mask512:
13704 case X86::BI__builtin_ia32_cvtw2mask128:
13705 case X86::BI__builtin_ia32_cvtw2mask256:
13706 case X86::BI__builtin_ia32_cvtw2mask512:
13707 case X86::BI__builtin_ia32_cvtd2mask128:
13708 case X86::BI__builtin_ia32_cvtd2mask256:
13709 case X86::BI__builtin_ia32_cvtd2mask512:
13710 case X86::BI__builtin_ia32_cvtq2mask128:
13711 case X86::BI__builtin_ia32_cvtq2mask256:
13712 case X86::BI__builtin_ia32_cvtq2mask512:
13713 return EmitX86ConvertToMask(*this, Ops[0]);
13714
13715 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
13716 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
13717 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
13718 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
13719 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
13720 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
13721 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
13722 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
13723 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
13724 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
13725 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
13726 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
13727 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
13728 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
13729
13730 case X86::BI__builtin_ia32_vfmaddss3:
13731 case X86::BI__builtin_ia32_vfmaddsd3:
13732 case X86::BI__builtin_ia32_vfmaddsh3_mask:
13733 case X86::BI__builtin_ia32_vfmaddss3_mask:
13734 case X86::BI__builtin_ia32_vfmaddsd3_mask:
13735 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
13736 case X86::BI__builtin_ia32_vfmaddss:
13737 case X86::BI__builtin_ia32_vfmaddsd:
13738 return EmitScalarFMAExpr(*this, E, Ops,
13739 Constant::getNullValue(Ops[0]->getType()));
13740 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
13741 case X86::BI__builtin_ia32_vfmaddss3_maskz:
13742 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
13743 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
13744 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
13745 case X86::BI__builtin_ia32_vfmaddss3_mask3:
13746 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
13747 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
13748 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
13749 case X86::BI__builtin_ia32_vfmsubss3_mask3:
13750 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
13751 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
13752 /*NegAcc*/ true);
13753 case X86::BI__builtin_ia32_vfmaddph:
13754 case X86::BI__builtin_ia32_vfmaddps:
13755 case X86::BI__builtin_ia32_vfmaddpd:
13756 case X86::BI__builtin_ia32_vfmaddph256:
13757 case X86::BI__builtin_ia32_vfmaddps256:
13758 case X86::BI__builtin_ia32_vfmaddpd256:
13759 case X86::BI__builtin_ia32_vfmaddph512_mask:
13760 case X86::BI__builtin_ia32_vfmaddph512_maskz:
13761 case X86::BI__builtin_ia32_vfmaddph512_mask3:
13762 case X86::BI__builtin_ia32_vfmaddps512_mask:
13763 case X86::BI__builtin_ia32_vfmaddps512_maskz:
13764 case X86::BI__builtin_ia32_vfmaddps512_mask3:
13765 case X86::BI__builtin_ia32_vfmsubps512_mask3:
13766 case X86::BI__builtin_ia32_vfmaddpd512_mask:
13767 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
13768 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
13769 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
13770 case X86::BI__builtin_ia32_vfmsubph512_mask3:
13771 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
13772 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
13773 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13774 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13775 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13776 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
13777 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13778 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13779 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13780 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13781 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13782 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13783 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13784 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
13785
13786 case X86::BI__builtin_ia32_movdqa32store128_mask:
13787 case X86::BI__builtin_ia32_movdqa64store128_mask:
13788 case X86::BI__builtin_ia32_storeaps128_mask:
13789 case X86::BI__builtin_ia32_storeapd128_mask:
13790 case X86::BI__builtin_ia32_movdqa32store256_mask:
13791 case X86::BI__builtin_ia32_movdqa64store256_mask:
13792 case X86::BI__builtin_ia32_storeaps256_mask:
13793 case X86::BI__builtin_ia32_storeapd256_mask:
13794 case X86::BI__builtin_ia32_movdqa32store512_mask:
13795 case X86::BI__builtin_ia32_movdqa64store512_mask:
13796 case X86::BI__builtin_ia32_storeaps512_mask:
13797 case X86::BI__builtin_ia32_storeapd512_mask:
13798 return EmitX86MaskedStore(
13799 *this, Ops,
13800 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
13801
13802 case X86::BI__builtin_ia32_loadups128_mask:
13803 case X86::BI__builtin_ia32_loadups256_mask:
13804 case X86::BI__builtin_ia32_loadups512_mask:
13805 case X86::BI__builtin_ia32_loadupd128_mask:
13806 case X86::BI__builtin_ia32_loadupd256_mask:
13807 case X86::BI__builtin_ia32_loadupd512_mask:
13808 case X86::BI__builtin_ia32_loaddquqi128_mask:
13809 case X86::BI__builtin_ia32_loaddquqi256_mask:
13810 case X86::BI__builtin_ia32_loaddquqi512_mask:
13811 case X86::BI__builtin_ia32_loaddquhi128_mask:
13812 case X86::BI__builtin_ia32_loaddquhi256_mask:
13813 case X86::BI__builtin_ia32_loaddquhi512_mask:
13814 case X86::BI__builtin_ia32_loaddqusi128_mask:
13815 case X86::BI__builtin_ia32_loaddqusi256_mask:
13816 case X86::BI__builtin_ia32_loaddqusi512_mask:
13817 case X86::BI__builtin_ia32_loaddqudi128_mask:
13818 case X86::BI__builtin_ia32_loaddqudi256_mask:
13819 case X86::BI__builtin_ia32_loaddqudi512_mask:
13820 return EmitX86MaskedLoad(*this, Ops, Align(1));
13821
13822 case X86::BI__builtin_ia32_loadsh128_mask:
13823 case X86::BI__builtin_ia32_loadss128_mask:
13824 case X86::BI__builtin_ia32_loadsd128_mask:
13825 return EmitX86MaskedLoad(*this, Ops, Align(1));
13826
13827 case X86::BI__builtin_ia32_loadaps128_mask:
13828 case X86::BI__builtin_ia32_loadaps256_mask:
13829 case X86::BI__builtin_ia32_loadaps512_mask:
13830 case X86::BI__builtin_ia32_loadapd128_mask:
13831 case X86::BI__builtin_ia32_loadapd256_mask:
13832 case X86::BI__builtin_ia32_loadapd512_mask:
13833 case X86::BI__builtin_ia32_movdqa32load128_mask:
13834 case X86::BI__builtin_ia32_movdqa32load256_mask:
13835 case X86::BI__builtin_ia32_movdqa32load512_mask:
13836 case X86::BI__builtin_ia32_movdqa64load128_mask:
13837 case X86::BI__builtin_ia32_movdqa64load256_mask:
13838 case X86::BI__builtin_ia32_movdqa64load512_mask:
13839 return EmitX86MaskedLoad(
13840 *this, Ops,
13841 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
13842
13843 case X86::BI__builtin_ia32_expandloaddf128_mask:
13844 case X86::BI__builtin_ia32_expandloaddf256_mask:
13845 case X86::BI__builtin_ia32_expandloaddf512_mask:
13846 case X86::BI__builtin_ia32_expandloadsf128_mask:
13847 case X86::BI__builtin_ia32_expandloadsf256_mask:
13848 case X86::BI__builtin_ia32_expandloadsf512_mask:
13849 case X86::BI__builtin_ia32_expandloaddi128_mask:
13850 case X86::BI__builtin_ia32_expandloaddi256_mask:
13851 case X86::BI__builtin_ia32_expandloaddi512_mask:
13852 case X86::BI__builtin_ia32_expandloadsi128_mask:
13853 case X86::BI__builtin_ia32_expandloadsi256_mask:
13854 case X86::BI__builtin_ia32_expandloadsi512_mask:
13855 case X86::BI__builtin_ia32_expandloadhi128_mask:
13856 case X86::BI__builtin_ia32_expandloadhi256_mask:
13857 case X86::BI__builtin_ia32_expandloadhi512_mask:
13858 case X86::BI__builtin_ia32_expandloadqi128_mask:
13859 case X86::BI__builtin_ia32_expandloadqi256_mask:
13860 case X86::BI__builtin_ia32_expandloadqi512_mask:
13861 return EmitX86ExpandLoad(*this, Ops);
13862
13863 case X86::BI__builtin_ia32_compressstoredf128_mask:
13864 case X86::BI__builtin_ia32_compressstoredf256_mask:
13865 case X86::BI__builtin_ia32_compressstoredf512_mask:
13866 case X86::BI__builtin_ia32_compressstoresf128_mask:
13867 case X86::BI__builtin_ia32_compressstoresf256_mask:
13868 case X86::BI__builtin_ia32_compressstoresf512_mask:
13869 case X86::BI__builtin_ia32_compressstoredi128_mask:
13870 case X86::BI__builtin_ia32_compressstoredi256_mask:
13871 case X86::BI__builtin_ia32_compressstoredi512_mask:
13872 case X86::BI__builtin_ia32_compressstoresi128_mask:
13873 case X86::BI__builtin_ia32_compressstoresi256_mask:
13874 case X86::BI__builtin_ia32_compressstoresi512_mask:
13875 case X86::BI__builtin_ia32_compressstorehi128_mask:
13876 case X86::BI__builtin_ia32_compressstorehi256_mask:
13877 case X86::BI__builtin_ia32_compressstorehi512_mask:
13878 case X86::BI__builtin_ia32_compressstoreqi128_mask:
13879 case X86::BI__builtin_ia32_compressstoreqi256_mask:
13880 case X86::BI__builtin_ia32_compressstoreqi512_mask:
13881 return EmitX86CompressStore(*this, Ops);
13882
13883 case X86::BI__builtin_ia32_expanddf128_mask:
13884 case X86::BI__builtin_ia32_expanddf256_mask:
13885 case X86::BI__builtin_ia32_expanddf512_mask:
13886 case X86::BI__builtin_ia32_expandsf128_mask:
13887 case X86::BI__builtin_ia32_expandsf256_mask:
13888 case X86::BI__builtin_ia32_expandsf512_mask:
13889 case X86::BI__builtin_ia32_expanddi128_mask:
13890 case X86::BI__builtin_ia32_expanddi256_mask:
13891 case X86::BI__builtin_ia32_expanddi512_mask:
13892 case X86::BI__builtin_ia32_expandsi128_mask:
13893 case X86::BI__builtin_ia32_expandsi256_mask:
13894 case X86::BI__builtin_ia32_expandsi512_mask:
13895 case X86::BI__builtin_ia32_expandhi128_mask:
13896 case X86::BI__builtin_ia32_expandhi256_mask:
13897 case X86::BI__builtin_ia32_expandhi512_mask:
13898 case X86::BI__builtin_ia32_expandqi128_mask:
13899 case X86::BI__builtin_ia32_expandqi256_mask:
13900 case X86::BI__builtin_ia32_expandqi512_mask:
13901 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
13902
13903 case X86::BI__builtin_ia32_compressdf128_mask:
13904 case X86::BI__builtin_ia32_compressdf256_mask:
13905 case X86::BI__builtin_ia32_compressdf512_mask:
13906 case X86::BI__builtin_ia32_compresssf128_mask:
13907 case X86::BI__builtin_ia32_compresssf256_mask:
13908 case X86::BI__builtin_ia32_compresssf512_mask:
13909 case X86::BI__builtin_ia32_compressdi128_mask:
13910 case X86::BI__builtin_ia32_compressdi256_mask:
13911 case X86::BI__builtin_ia32_compressdi512_mask:
13912 case X86::BI__builtin_ia32_compresssi128_mask:
13913 case X86::BI__builtin_ia32_compresssi256_mask:
13914 case X86::BI__builtin_ia32_compresssi512_mask:
13915 case X86::BI__builtin_ia32_compresshi128_mask:
13916 case X86::BI__builtin_ia32_compresshi256_mask:
13917 case X86::BI__builtin_ia32_compresshi512_mask:
13918 case X86::BI__builtin_ia32_compressqi128_mask:
13919 case X86::BI__builtin_ia32_compressqi256_mask:
13920 case X86::BI__builtin_ia32_compressqi512_mask:
13921 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
13922
13923 case X86::BI__builtin_ia32_gather3div2df:
13924 case X86::BI__builtin_ia32_gather3div2di:
13925 case X86::BI__builtin_ia32_gather3div4df:
13926 case X86::BI__builtin_ia32_gather3div4di:
13927 case X86::BI__builtin_ia32_gather3div4sf:
13928 case X86::BI__builtin_ia32_gather3div4si:
13929 case X86::BI__builtin_ia32_gather3div8sf:
13930 case X86::BI__builtin_ia32_gather3div8si:
13931 case X86::BI__builtin_ia32_gather3siv2df:
13932 case X86::BI__builtin_ia32_gather3siv2di:
13933 case X86::BI__builtin_ia32_gather3siv4df:
13934 case X86::BI__builtin_ia32_gather3siv4di:
13935 case X86::BI__builtin_ia32_gather3siv4sf:
13936 case X86::BI__builtin_ia32_gather3siv4si:
13937 case X86::BI__builtin_ia32_gather3siv8sf:
13938 case X86::BI__builtin_ia32_gather3siv8si:
13939 case X86::BI__builtin_ia32_gathersiv8df:
13940 case X86::BI__builtin_ia32_gathersiv16sf:
13941 case X86::BI__builtin_ia32_gatherdiv8df:
13942 case X86::BI__builtin_ia32_gatherdiv16sf:
13943 case X86::BI__builtin_ia32_gathersiv8di:
13944 case X86::BI__builtin_ia32_gathersiv16si:
13945 case X86::BI__builtin_ia32_gatherdiv8di:
13946 case X86::BI__builtin_ia32_gatherdiv16si: {
13947 Intrinsic::ID IID;
13948 switch (BuiltinID) {
13949 default: llvm_unreachable("Unexpected builtin");
13950 case X86::BI__builtin_ia32_gather3div2df:
13951 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
13952 break;
13953 case X86::BI__builtin_ia32_gather3div2di:
13954 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
13955 break;
13956 case X86::BI__builtin_ia32_gather3div4df:
13957 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
13958 break;
13959 case X86::BI__builtin_ia32_gather3div4di:
13960 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
13961 break;
13962 case X86::BI__builtin_ia32_gather3div4sf:
13963 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
13964 break;
13965 case X86::BI__builtin_ia32_gather3div4si:
13966 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
13967 break;
13968 case X86::BI__builtin_ia32_gather3div8sf:
13969 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
13970 break;
13971 case X86::BI__builtin_ia32_gather3div8si:
13972 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
13973 break;
13974 case X86::BI__builtin_ia32_gather3siv2df:
13975 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
13976 break;
13977 case X86::BI__builtin_ia32_gather3siv2di:
13978 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
13979 break;
13980 case X86::BI__builtin_ia32_gather3siv4df:
13981 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
13982 break;
13983 case X86::BI__builtin_ia32_gather3siv4di:
13984 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
13985 break;
13986 case X86::BI__builtin_ia32_gather3siv4sf:
13987 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
13988 break;
13989 case X86::BI__builtin_ia32_gather3siv4si:
13990 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
13991 break;
13992 case X86::BI__builtin_ia32_gather3siv8sf:
13993 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
13994 break;
13995 case X86::BI__builtin_ia32_gather3siv8si:
13996 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
13997 break;
13998 case X86::BI__builtin_ia32_gathersiv8df:
13999 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14000 break;
14001 case X86::BI__builtin_ia32_gathersiv16sf:
14002 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14003 break;
14004 case X86::BI__builtin_ia32_gatherdiv8df:
14005 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14006 break;
14007 case X86::BI__builtin_ia32_gatherdiv16sf:
14008 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14009 break;
14010 case X86::BI__builtin_ia32_gathersiv8di:
14011 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14012 break;
14013 case X86::BI__builtin_ia32_gathersiv16si:
14014 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14015 break;
14016 case X86::BI__builtin_ia32_gatherdiv8di:
14017 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14018 break;
14019 case X86::BI__builtin_ia32_gatherdiv16si:
14020 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14021 break;
14022 }
14023
14024 unsigned MinElts = std::min(
14025 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14026 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14027 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14028 Function *Intr = CGM.getIntrinsic(IID);
14029 return Builder.CreateCall(Intr, Ops);
14030 }
14031
14032 case X86::BI__builtin_ia32_scattersiv8df:
14033 case X86::BI__builtin_ia32_scattersiv16sf:
14034 case X86::BI__builtin_ia32_scatterdiv8df:
14035 case X86::BI__builtin_ia32_scatterdiv16sf:
14036 case X86::BI__builtin_ia32_scattersiv8di:
14037 case X86::BI__builtin_ia32_scattersiv16si:
14038 case X86::BI__builtin_ia32_scatterdiv8di:
14039 case X86::BI__builtin_ia32_scatterdiv16si:
14040 case X86::BI__builtin_ia32_scatterdiv2df:
14041 case X86::BI__builtin_ia32_scatterdiv2di:
14042 case X86::BI__builtin_ia32_scatterdiv4df:
14043 case X86::BI__builtin_ia32_scatterdiv4di:
14044 case X86::BI__builtin_ia32_scatterdiv4sf:
14045 case X86::BI__builtin_ia32_scatterdiv4si:
14046 case X86::BI__builtin_ia32_scatterdiv8sf:
14047 case X86::BI__builtin_ia32_scatterdiv8si:
14048 case X86::BI__builtin_ia32_scattersiv2df:
14049 case X86::BI__builtin_ia32_scattersiv2di:
14050 case X86::BI__builtin_ia32_scattersiv4df:
14051 case X86::BI__builtin_ia32_scattersiv4di:
14052 case X86::BI__builtin_ia32_scattersiv4sf:
14053 case X86::BI__builtin_ia32_scattersiv4si:
14054 case X86::BI__builtin_ia32_scattersiv8sf:
14055 case X86::BI__builtin_ia32_scattersiv8si: {
14056 Intrinsic::ID IID;
14057 switch (BuiltinID) {
14058 default: llvm_unreachable("Unexpected builtin");
14059 case X86::BI__builtin_ia32_scattersiv8df:
14060 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14061 break;
14062 case X86::BI__builtin_ia32_scattersiv16sf:
14063 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14064 break;
14065 case X86::BI__builtin_ia32_scatterdiv8df:
14066 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14067 break;
14068 case X86::BI__builtin_ia32_scatterdiv16sf:
14069 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14070 break;
14071 case X86::BI__builtin_ia32_scattersiv8di:
14072 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14073 break;
14074 case X86::BI__builtin_ia32_scattersiv16si:
14075 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14076 break;
14077 case X86::BI__builtin_ia32_scatterdiv8di:
14078 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14079 break;
14080 case X86::BI__builtin_ia32_scatterdiv16si:
14081 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14082 break;
14083 case X86::BI__builtin_ia32_scatterdiv2df:
14084 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14085 break;
14086 case X86::BI__builtin_ia32_scatterdiv2di:
14087 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14088 break;
14089 case X86::BI__builtin_ia32_scatterdiv4df:
14090 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14091 break;
14092 case X86::BI__builtin_ia32_scatterdiv4di:
14093 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14094 break;
14095 case X86::BI__builtin_ia32_scatterdiv4sf:
14096 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14097 break;
14098 case X86::BI__builtin_ia32_scatterdiv4si:
14099 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14100 break;
14101 case X86::BI__builtin_ia32_scatterdiv8sf:
14102 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14103 break;
14104 case X86::BI__builtin_ia32_scatterdiv8si:
14105 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14106 break;
14107 case X86::BI__builtin_ia32_scattersiv2df:
14108 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14109 break;
14110 case X86::BI__builtin_ia32_scattersiv2di:
14111 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14112 break;
14113 case X86::BI__builtin_ia32_scattersiv4df:
14114 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14115 break;
14116 case X86::BI__builtin_ia32_scattersiv4di:
14117 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14118 break;
14119 case X86::BI__builtin_ia32_scattersiv4sf:
14120 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14121 break;
14122 case X86::BI__builtin_ia32_scattersiv4si:
14123 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14124 break;
14125 case X86::BI__builtin_ia32_scattersiv8sf:
14126 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14127 break;
14128 case X86::BI__builtin_ia32_scattersiv8si:
14129 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14130 break;
14131 }
14132
14133 unsigned MinElts = std::min(
14134 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
14135 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
14136 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
14137 Function *Intr = CGM.getIntrinsic(IID);
14138 return Builder.CreateCall(Intr, Ops);
14139 }
14140
14141 case X86::BI__builtin_ia32_vextractf128_pd256:
14142 case X86::BI__builtin_ia32_vextractf128_ps256:
14143 case X86::BI__builtin_ia32_vextractf128_si256:
14144 case X86::BI__builtin_ia32_extract128i256:
14145 case X86::BI__builtin_ia32_extractf64x4_mask:
14146 case X86::BI__builtin_ia32_extractf32x4_mask:
14147 case X86::BI__builtin_ia32_extracti64x4_mask:
14148 case X86::BI__builtin_ia32_extracti32x4_mask:
14149 case X86::BI__builtin_ia32_extractf32x8_mask:
14150 case X86::BI__builtin_ia32_extracti32x8_mask:
14151 case X86::BI__builtin_ia32_extractf32x4_256_mask:
14152 case X86::BI__builtin_ia32_extracti32x4_256_mask:
14153 case X86::BI__builtin_ia32_extractf64x2_256_mask:
14154 case X86::BI__builtin_ia32_extracti64x2_256_mask:
14155 case X86::BI__builtin_ia32_extractf64x2_512_mask:
14156 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
14157 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
14158 unsigned NumElts = DstTy->getNumElements();
14159 unsigned SrcNumElts =
14160 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14161 unsigned SubVectors = SrcNumElts / NumElts;
14162 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14163 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14164 Index &= SubVectors - 1; // Remove any extra bits.
14165 Index *= NumElts;
14166
14167 int Indices[16];
14168 for (unsigned i = 0; i != NumElts; ++i)
14169 Indices[i] = i + Index;
14170
14171 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14172 "extract");
14173
14174 if (Ops.size() == 4)
14175 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
14176
14177 return Res;
14178 }
14179 case X86::BI__builtin_ia32_vinsertf128_pd256:
14180 case X86::BI__builtin_ia32_vinsertf128_ps256:
14181 case X86::BI__builtin_ia32_vinsertf128_si256:
14182 case X86::BI__builtin_ia32_insert128i256:
14183 case X86::BI__builtin_ia32_insertf64x4:
14184 case X86::BI__builtin_ia32_insertf32x4:
14185 case X86::BI__builtin_ia32_inserti64x4:
14186 case X86::BI__builtin_ia32_inserti32x4:
14187 case X86::BI__builtin_ia32_insertf32x8:
14188 case X86::BI__builtin_ia32_inserti32x8:
14189 case X86::BI__builtin_ia32_insertf32x4_256:
14190 case X86::BI__builtin_ia32_inserti32x4_256:
14191 case X86::BI__builtin_ia32_insertf64x2_256:
14192 case X86::BI__builtin_ia32_inserti64x2_256:
14193 case X86::BI__builtin_ia32_insertf64x2_512:
14194 case X86::BI__builtin_ia32_inserti64x2_512: {
14195 unsigned DstNumElts =
14196 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14197 unsigned SrcNumElts =
14198 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
14199 unsigned SubVectors = DstNumElts / SrcNumElts;
14200 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14201 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14202 Index &= SubVectors - 1; // Remove any extra bits.
14203 Index *= SrcNumElts;
14204
14205 int Indices[16];
14206 for (unsigned i = 0; i != DstNumElts; ++i)
14207 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
14208
14209 Value *Op1 = Builder.CreateShuffleVector(
14210 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
14211
14212 for (unsigned i = 0; i != DstNumElts; ++i) {
14213 if (i >= Index && i < (Index + SrcNumElts))
14214 Indices[i] = (i - Index) + DstNumElts;
14215 else
14216 Indices[i] = i;
14217 }
14218
14219 return Builder.CreateShuffleVector(Ops[0], Op1,
14220 ArrayRef(Indices, DstNumElts), "insert");
14221 }
14222 case X86::BI__builtin_ia32_pmovqd512_mask:
14223 case X86::BI__builtin_ia32_pmovwb512_mask: {
14224 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14225 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
14226 }
14227 case X86::BI__builtin_ia32_pmovdb512_mask:
14228 case X86::BI__builtin_ia32_pmovdw512_mask:
14229 case X86::BI__builtin_ia32_pmovqw512_mask: {
14230 if (const auto *C = dyn_cast<Constant>(Ops[2]))
14231 if (C->isAllOnesValue())
14232 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14233
14234 Intrinsic::ID IID;
14235 switch (BuiltinID) {
14236 default: llvm_unreachable("Unsupported intrinsic!");
14237 case X86::BI__builtin_ia32_pmovdb512_mask:
14238 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
14239 break;
14240 case X86::BI__builtin_ia32_pmovdw512_mask:
14241 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
14242 break;
14243 case X86::BI__builtin_ia32_pmovqw512_mask:
14244 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
14245 break;
14246 }
14247
14248 Function *Intr = CGM.getIntrinsic(IID);
14249 return Builder.CreateCall(Intr, Ops);
14250 }
14251 case X86::BI__builtin_ia32_pblendw128:
14252 case X86::BI__builtin_ia32_blendpd:
14253 case X86::BI__builtin_ia32_blendps:
14254 case X86::BI__builtin_ia32_blendpd256:
14255 case X86::BI__builtin_ia32_blendps256:
14256 case X86::BI__builtin_ia32_pblendw256:
14257 case X86::BI__builtin_ia32_pblendd128:
14258 case X86::BI__builtin_ia32_pblendd256: {
14259 unsigned NumElts =
14260 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14261 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14262
14263 int Indices[16];
14264 // If there are more than 8 elements, the immediate is used twice so make
14265 // sure we handle that.
14266 for (unsigned i = 0; i != NumElts; ++i)
14267 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
14268
14269 return Builder.CreateShuffleVector(Ops[0], Ops[1],
14270 ArrayRef(Indices, NumElts), "blend");
14271 }
14272 case X86::BI__builtin_ia32_pshuflw:
14273 case X86::BI__builtin_ia32_pshuflw256:
14274 case X86::BI__builtin_ia32_pshuflw512: {
14275 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14276 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14277 unsigned NumElts = Ty->getNumElements();
14278
14279 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14280 Imm = (Imm & 0xff) * 0x01010101;
14281
14282 int Indices[32];
14283 for (unsigned l = 0; l != NumElts; l += 8) {
14284 for (unsigned i = 0; i != 4; ++i) {
14285 Indices[l + i] = l + (Imm & 3);
14286 Imm >>= 2;
14287 }
14288 for (unsigned i = 4; i != 8; ++i)
14289 Indices[l + i] = l + i;
14290 }
14291
14292 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14293 "pshuflw");
14294 }
14295 case X86::BI__builtin_ia32_pshufhw:
14296 case X86::BI__builtin_ia32_pshufhw256:
14297 case X86::BI__builtin_ia32_pshufhw512: {
14298 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14299 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14300 unsigned NumElts = Ty->getNumElements();
14301
14302 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14303 Imm = (Imm & 0xff) * 0x01010101;
14304
14305 int Indices[32];
14306 for (unsigned l = 0; l != NumElts; l += 8) {
14307 for (unsigned i = 0; i != 4; ++i)
14308 Indices[l + i] = l + i;
14309 for (unsigned i = 4; i != 8; ++i) {
14310 Indices[l + i] = l + 4 + (Imm & 3);
14311 Imm >>= 2;
14312 }
14313 }
14314
14315 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14316 "pshufhw");
14317 }
14318 case X86::BI__builtin_ia32_pshufd:
14319 case X86::BI__builtin_ia32_pshufd256:
14320 case X86::BI__builtin_ia32_pshufd512:
14321 case X86::BI__builtin_ia32_vpermilpd:
14322 case X86::BI__builtin_ia32_vpermilps:
14323 case X86::BI__builtin_ia32_vpermilpd256:
14324 case X86::BI__builtin_ia32_vpermilps256:
14325 case X86::BI__builtin_ia32_vpermilpd512:
14326 case X86::BI__builtin_ia32_vpermilps512: {
14327 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14328 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14329 unsigned NumElts = Ty->getNumElements();
14330 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
14331 unsigned NumLaneElts = NumElts / NumLanes;
14332
14333 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14334 Imm = (Imm & 0xff) * 0x01010101;
14335
14336 int Indices[16];
14337 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
14338 for (unsigned i = 0; i != NumLaneElts; ++i) {
14339 Indices[i + l] = (Imm % NumLaneElts) + l;
14340 Imm /= NumLaneElts;
14341 }
14342 }
14343
14344 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14345 "permil");
14346 }
14347 case X86::BI__builtin_ia32_shufpd:
14348 case X86::BI__builtin_ia32_shufpd256:
14349 case X86::BI__builtin_ia32_shufpd512:
14350 case X86::BI__builtin_ia32_shufps:
14351 case X86::BI__builtin_ia32_shufps256:
14352 case X86::BI__builtin_ia32_shufps512: {
14353 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14354 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14355 unsigned NumElts = Ty->getNumElements();
14356 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
14357 unsigned NumLaneElts = NumElts / NumLanes;
14358
14359 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14360 Imm = (Imm & 0xff) * 0x01010101;
14361
14362 int Indices[16];
14363 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
14364 for (unsigned i = 0; i != NumLaneElts; ++i) {
14365 unsigned Index = Imm % NumLaneElts;
14366 Imm /= NumLaneElts;
14367 if (i >= (NumLaneElts / 2))
14368 Index += NumElts;
14369 Indices[l + i] = l + Index;
14370 }
14371 }
14372
14373 return Builder.CreateShuffleVector(Ops[0], Ops[1],
14374 ArrayRef(Indices, NumElts), "shufp");
14375 }
14376 case X86::BI__builtin_ia32_permdi256:
14377 case X86::BI__builtin_ia32_permdf256:
14378 case X86::BI__builtin_ia32_permdi512:
14379 case X86::BI__builtin_ia32_permdf512: {
14380 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14381 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14382 unsigned NumElts = Ty->getNumElements();
14383
14384 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
14385 int Indices[8];
14386 for (unsigned l = 0; l != NumElts; l += 4)
14387 for (unsigned i = 0; i != 4; ++i)
14388 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
14389
14390 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14391 "perm");
14392 }
14393 case X86::BI__builtin_ia32_palignr128:
14394 case X86::BI__builtin_ia32_palignr256:
14395 case X86::BI__builtin_ia32_palignr512: {
14396 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
14397
14398 unsigned NumElts =
14399 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14400 assert(NumElts % 16 == 0);
14401
14402 // If palignr is shifting the pair of vectors more than the size of two
14403 // lanes, emit zero.
14404 if (ShiftVal >= 32)
14405 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14406
14407 // If palignr is shifting the pair of input vectors more than one lane,
14408 // but less than two lanes, convert to shifting in zeroes.
14409 if (ShiftVal > 16) {
14410 ShiftVal -= 16;
14411 Ops[1] = Ops[0];
14412 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
14413 }
14414
14415 int Indices[64];
14416 // 256-bit palignr operates on 128-bit lanes so we need to handle that
14417 for (unsigned l = 0; l != NumElts; l += 16) {
14418 for (unsigned i = 0; i != 16; ++i) {
14419 unsigned Idx = ShiftVal + i;
14420 if (Idx >= 16)
14421 Idx += NumElts - 16; // End of lane, switch operand.
14422 Indices[l + i] = Idx + l;
14423 }
14424 }
14425
14426 return Builder.CreateShuffleVector(Ops[1], Ops[0],
14427 ArrayRef(Indices, NumElts), "palignr");
14428 }
14429 case X86::BI__builtin_ia32_alignd128:
14430 case X86::BI__builtin_ia32_alignd256:
14431 case X86::BI__builtin_ia32_alignd512:
14432 case X86::BI__builtin_ia32_alignq128:
14433 case X86::BI__builtin_ia32_alignq256:
14434 case X86::BI__builtin_ia32_alignq512: {
14435 unsigned NumElts =
14436 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14437 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
14438
14439 // Mask the shift amount to width of a vector.
14440 ShiftVal &= NumElts - 1;
14441
14442 int Indices[16];
14443 for (unsigned i = 0; i != NumElts; ++i)
14444 Indices[i] = i + ShiftVal;
14445
14446 return Builder.CreateShuffleVector(Ops[1], Ops[0],
14447 ArrayRef(Indices, NumElts), "valign");
14448 }
14449 case X86::BI__builtin_ia32_shuf_f32x4_256:
14450 case X86::BI__builtin_ia32_shuf_f64x2_256:
14451 case X86::BI__builtin_ia32_shuf_i32x4_256:
14452 case X86::BI__builtin_ia32_shuf_i64x2_256:
14453 case X86::BI__builtin_ia32_shuf_f32x4:
14454 case X86::BI__builtin_ia32_shuf_f64x2:
14455 case X86::BI__builtin_ia32_shuf_i32x4:
14456 case X86::BI__builtin_ia32_shuf_i64x2: {
14457 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14458 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14459 unsigned NumElts = Ty->getNumElements();
14460 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
14461 unsigned NumLaneElts = NumElts / NumLanes;
14462
14463 int Indices[16];
14464 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
14465 unsigned Index = (Imm % NumLanes) * NumLaneElts;
14466 Imm /= NumLanes; // Discard the bits we just used.
14467 if (l >= (NumElts / 2))
14468 Index += NumElts; // Switch to other source.
14469 for (unsigned i = 0; i != NumLaneElts; ++i) {
14470 Indices[l + i] = Index + i;
14471 }
14472 }
14473
14474 return Builder.CreateShuffleVector(Ops[0], Ops[1],
14475 ArrayRef(Indices, NumElts), "shuf");
14476 }
14477
14478 case X86::BI__builtin_ia32_vperm2f128_pd256:
14479 case X86::BI__builtin_ia32_vperm2f128_ps256:
14480 case X86::BI__builtin_ia32_vperm2f128_si256:
14481 case X86::BI__builtin_ia32_permti256: {
14482 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14483 unsigned NumElts =
14484 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14485
14486 // This takes a very simple approach since there are two lanes and a
14487 // shuffle can have 2 inputs. So we reserve the first input for the first
14488 // lane and the second input for the second lane. This may result in
14489 // duplicate sources, but this can be dealt with in the backend.
14490
14491 Value *OutOps[2];
14492 int Indices[8];
14493 for (unsigned l = 0; l != 2; ++l) {
14494 // Determine the source for this lane.
14495 if (Imm & (1 << ((l * 4) + 3)))
14496 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
14497 else if (Imm & (1 << ((l * 4) + 1)))
14498 OutOps[l] = Ops[1];
14499 else
14500 OutOps[l] = Ops[0];
14501
14502 for (unsigned i = 0; i != NumElts/2; ++i) {
14503 // Start with ith element of the source for this lane.
14504 unsigned Idx = (l * NumElts) + i;
14505 // If bit 0 of the immediate half is set, switch to the high half of
14506 // the source.
14507 if (Imm & (1 << (l * 4)))
14508 Idx += NumElts/2;
14509 Indices[(l * (NumElts/2)) + i] = Idx;
14510 }
14511 }
14512
14513 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
14514 ArrayRef(Indices, NumElts), "vperm");
14515 }
14516
14517 case X86::BI__builtin_ia32_pslldqi128_byteshift:
14518 case X86::BI__builtin_ia32_pslldqi256_byteshift:
14519 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
14520 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
14521 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
14522 // Builtin type is vXi64 so multiply by 8 to get bytes.
14523 unsigned NumElts = ResultType->getNumElements() * 8;
14524
14525 // If pslldq is shifting the vector more than 15 bytes, emit zero.
14526 if (ShiftVal >= 16)
14527 return llvm::Constant::getNullValue(ResultType);
14528
14529 int Indices[64];
14530 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
14531 for (unsigned l = 0; l != NumElts; l += 16) {
14532 for (unsigned i = 0; i != 16; ++i) {
14533 unsigned Idx = NumElts + i - ShiftVal;
14534 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
14535 Indices[l + i] = Idx + l;
14536 }
14537 }
14538
14539 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
14540 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
14541 Value *Zero = llvm::Constant::getNullValue(VecTy);
14542 Value *SV = Builder.CreateShuffleVector(
14543 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
14544 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
14545 }
14546 case X86::BI__builtin_ia32_psrldqi128_byteshift:
14547 case X86::BI__builtin_ia32_psrldqi256_byteshift:
14548 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
14549 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
14550 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
14551 // Builtin type is vXi64 so multiply by 8 to get bytes.
14552 unsigned NumElts = ResultType->getNumElements() * 8;
14553
14554 // If psrldq is shifting the vector more than 15 bytes, emit zero.
14555 if (ShiftVal >= 16)
14556 return llvm::Constant::getNullValue(ResultType);
14557
14558 int Indices[64];
14559 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
14560 for (unsigned l = 0; l != NumElts; l += 16) {
14561 for (unsigned i = 0; i != 16; ++i) {
14562 unsigned Idx = i + ShiftVal;
14563 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
14564 Indices[l + i] = Idx + l;
14565 }
14566 }
14567
14568 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
14569 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
14570 Value *Zero = llvm::Constant::getNullValue(VecTy);
14571 Value *SV = Builder.CreateShuffleVector(
14572 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
14573 return Builder.CreateBitCast(SV, ResultType, "cast");
14574 }
14575 case X86::BI__builtin_ia32_kshiftliqi:
14576 case X86::BI__builtin_ia32_kshiftlihi:
14577 case X86::BI__builtin_ia32_kshiftlisi:
14578 case X86::BI__builtin_ia32_kshiftlidi: {
14579 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
14580 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14581
14582 if (ShiftVal >= NumElts)
14583 return llvm::Constant::getNullValue(Ops[0]->getType());
14584
14585 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
14586
14587 int Indices[64];
14588 for (unsigned i = 0; i != NumElts; ++i)
14589 Indices[i] = NumElts + i - ShiftVal;
14590
14591 Value *Zero = llvm::Constant::getNullValue(In->getType());
14592 Value *SV = Builder.CreateShuffleVector(
14593 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
14594 return Builder.CreateBitCast(SV, Ops[0]->getType());
14595 }
14596 case X86::BI__builtin_ia32_kshiftriqi:
14597 case X86::BI__builtin_ia32_kshiftrihi:
14598 case X86::BI__builtin_ia32_kshiftrisi:
14599 case X86::BI__builtin_ia32_kshiftridi: {
14600 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
14601 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14602
14603 if (ShiftVal >= NumElts)
14604 return llvm::Constant::getNullValue(Ops[0]->getType());
14605
14606 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
14607
14608 int Indices[64];
14609 for (unsigned i = 0; i != NumElts; ++i)
14610 Indices[i] = i + ShiftVal;
14611
14612 Value *Zero = llvm::Constant::getNullValue(In->getType());
14613 Value *SV = Builder.CreateShuffleVector(
14614 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
14615 return Builder.CreateBitCast(SV, Ops[0]->getType());
14616 }
14617 case X86::BI__builtin_ia32_movnti:
14618 case X86::BI__builtin_ia32_movnti64:
14619 case X86::BI__builtin_ia32_movntsd:
14620 case X86::BI__builtin_ia32_movntss: {
14621 llvm::MDNode *Node = llvm::MDNode::get(
14622 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
14623
14624 Value *Ptr = Ops[0];
14625 Value *Src = Ops[1];
14626
14627 // Extract the 0'th element of the source vector.
14628 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
14629 BuiltinID == X86::BI__builtin_ia32_movntss)
14630 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
14631
14632 // Convert the type of the pointer to a pointer to the stored type.
14633 Value *BC = Builder.CreateBitCast(
14634 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
14635
14636 // Unaligned nontemporal store of the scalar value.
14637 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
14638 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
14639 SI->setAlignment(llvm::Align(1));
14640 return SI;
14641 }
14642 // Rotate is a special case of funnel shift - 1st 2 args are the same.
14643 case X86::BI__builtin_ia32_vprotb:
14644 case X86::BI__builtin_ia32_vprotw:
14645 case X86::BI__builtin_ia32_vprotd:
14646 case X86::BI__builtin_ia32_vprotq:
14647 case X86::BI__builtin_ia32_vprotbi:
14648 case X86::BI__builtin_ia32_vprotwi:
14649 case X86::BI__builtin_ia32_vprotdi:
14650 case X86::BI__builtin_ia32_vprotqi:
14651 case X86::BI__builtin_ia32_prold128:
14652 case X86::BI__builtin_ia32_prold256:
14653 case X86::BI__builtin_ia32_prold512:
14654 case X86::BI__builtin_ia32_prolq128:
14655 case X86::BI__builtin_ia32_prolq256:
14656 case X86::BI__builtin_ia32_prolq512:
14657 case X86::BI__builtin_ia32_prolvd128:
14658 case X86::BI__builtin_ia32_prolvd256:
14659 case X86::BI__builtin_ia32_prolvd512:
14660 case X86::BI__builtin_ia32_prolvq128:
14661 case X86::BI__builtin_ia32_prolvq256:
14662 case X86::BI__builtin_ia32_prolvq512:
14663 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
14664 case X86::BI__builtin_ia32_prord128:
14665 case X86::BI__builtin_ia32_prord256:
14666 case X86::BI__builtin_ia32_prord512:
14667 case X86::BI__builtin_ia32_prorq128:
14668 case X86::BI__builtin_ia32_prorq256:
14669 case X86::BI__builtin_ia32_prorq512:
14670 case X86::BI__builtin_ia32_prorvd128:
14671 case X86::BI__builtin_ia32_prorvd256:
14672 case X86::BI__builtin_ia32_prorvd512:
14673 case X86::BI__builtin_ia32_prorvq128:
14674 case X86::BI__builtin_ia32_prorvq256:
14675 case X86::BI__builtin_ia32_prorvq512:
14676 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
14677 case X86::BI__builtin_ia32_selectb_128:
14678 case X86::BI__builtin_ia32_selectb_256:
14679 case X86::BI__builtin_ia32_selectb_512:
14680 case X86::BI__builtin_ia32_selectw_128:
14681 case X86::BI__builtin_ia32_selectw_256:
14682 case X86::BI__builtin_ia32_selectw_512:
14683 case X86::BI__builtin_ia32_selectd_128:
14684 case X86::BI__builtin_ia32_selectd_256:
14685 case X86::BI__builtin_ia32_selectd_512:
14686 case X86::BI__builtin_ia32_selectq_128:
14687 case X86::BI__builtin_ia32_selectq_256:
14688 case X86::BI__builtin_ia32_selectq_512:
14689 case X86::BI__builtin_ia32_selectph_128:
14690 case X86::BI__builtin_ia32_selectph_256:
14691 case X86::BI__builtin_ia32_selectph_512:
14692 case X86::BI__builtin_ia32_selectpbf_128:
14693 case X86::BI__builtin_ia32_selectpbf_256:
14694 case X86::BI__builtin_ia32_selectpbf_512:
14695 case X86::BI__builtin_ia32_selectps_128:
14696 case X86::BI__builtin_ia32_selectps_256:
14697 case X86::BI__builtin_ia32_selectps_512:
14698 case X86::BI__builtin_ia32_selectpd_128:
14699 case X86::BI__builtin_ia32_selectpd_256:
14700 case X86::BI__builtin_ia32_selectpd_512:
14701 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
14702 case X86::BI__builtin_ia32_selectsh_128:
14703 case X86::BI__builtin_ia32_selectsbf_128:
14704 case X86::BI__builtin_ia32_selectss_128:
14705 case X86::BI__builtin_ia32_selectsd_128: {
14706 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14707 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14708 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
14709 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
14710 }
14711 case X86::BI__builtin_ia32_cmpb128_mask:
14712 case X86::BI__builtin_ia32_cmpb256_mask:
14713 case X86::BI__builtin_ia32_cmpb512_mask:
14714 case X86::BI__builtin_ia32_cmpw128_mask:
14715 case X86::BI__builtin_ia32_cmpw256_mask:
14716 case X86::BI__builtin_ia32_cmpw512_mask:
14717 case X86::BI__builtin_ia32_cmpd128_mask:
14718 case X86::BI__builtin_ia32_cmpd256_mask:
14719 case X86::BI__builtin_ia32_cmpd512_mask:
14720 case X86::BI__builtin_ia32_cmpq128_mask:
14721 case X86::BI__builtin_ia32_cmpq256_mask:
14722 case X86::BI__builtin_ia32_cmpq512_mask: {
14723 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14724 return EmitX86MaskedCompare(*this, CC, true, Ops);
14725 }
14726 case X86::BI__builtin_ia32_ucmpb128_mask:
14727 case X86::BI__builtin_ia32_ucmpb256_mask:
14728 case X86::BI__builtin_ia32_ucmpb512_mask:
14729 case X86::BI__builtin_ia32_ucmpw128_mask:
14730 case X86::BI__builtin_ia32_ucmpw256_mask:
14731 case X86::BI__builtin_ia32_ucmpw512_mask:
14732 case X86::BI__builtin_ia32_ucmpd128_mask:
14733 case X86::BI__builtin_ia32_ucmpd256_mask:
14734 case X86::BI__builtin_ia32_ucmpd512_mask:
14735 case X86::BI__builtin_ia32_ucmpq128_mask:
14736 case X86::BI__builtin_ia32_ucmpq256_mask:
14737 case X86::BI__builtin_ia32_ucmpq512_mask: {
14738 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14739 return EmitX86MaskedCompare(*this, CC, false, Ops);
14740 }
14741 case X86::BI__builtin_ia32_vpcomb:
14742 case X86::BI__builtin_ia32_vpcomw:
14743 case X86::BI__builtin_ia32_vpcomd:
14744 case X86::BI__builtin_ia32_vpcomq:
14745 return EmitX86vpcom(*this, Ops, true);
14746 case X86::BI__builtin_ia32_vpcomub:
14747 case X86::BI__builtin_ia32_vpcomuw:
14748 case X86::BI__builtin_ia32_vpcomud:
14749 case X86::BI__builtin_ia32_vpcomuq:
14750 return EmitX86vpcom(*this, Ops, false);
14751
14752 case X86::BI__builtin_ia32_kortestcqi:
14753 case X86::BI__builtin_ia32_kortestchi:
14754 case X86::BI__builtin_ia32_kortestcsi:
14755 case X86::BI__builtin_ia32_kortestcdi: {
14756 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
14757 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
14758 Value *Cmp = Builder.CreateICmpEQ(Or, C);
14759 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
14760 }
14761 case X86::BI__builtin_ia32_kortestzqi:
14762 case X86::BI__builtin_ia32_kortestzhi:
14763 case X86::BI__builtin_ia32_kortestzsi:
14764 case X86::BI__builtin_ia32_kortestzdi: {
14765 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
14766 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
14767 Value *Cmp = Builder.CreateICmpEQ(Or, C);
14768 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
14769 }
14770
14771 case X86::BI__builtin_ia32_ktestcqi:
14772 case X86::BI__builtin_ia32_ktestzqi:
14773 case X86::BI__builtin_ia32_ktestchi:
14774 case X86::BI__builtin_ia32_ktestzhi:
14775 case X86::BI__builtin_ia32_ktestcsi:
14776 case X86::BI__builtin_ia32_ktestzsi:
14777 case X86::BI__builtin_ia32_ktestcdi:
14778 case X86::BI__builtin_ia32_ktestzdi: {
14779 Intrinsic::ID IID;
14780 switch (BuiltinID) {
14781 default: llvm_unreachable("Unsupported intrinsic!");
14782 case X86::BI__builtin_ia32_ktestcqi:
14783 IID = Intrinsic::x86_avx512_ktestc_b;
14784 break;
14785 case X86::BI__builtin_ia32_ktestzqi:
14786 IID = Intrinsic::x86_avx512_ktestz_b;
14787 break;
14788 case X86::BI__builtin_ia32_ktestchi:
14789 IID = Intrinsic::x86_avx512_ktestc_w;
14790 break;
14791 case X86::BI__builtin_ia32_ktestzhi:
14792 IID = Intrinsic::x86_avx512_ktestz_w;
14793 break;
14794 case X86::BI__builtin_ia32_ktestcsi:
14795 IID = Intrinsic::x86_avx512_ktestc_d;
14796 break;
14797 case X86::BI__builtin_ia32_ktestzsi:
14798 IID = Intrinsic::x86_avx512_ktestz_d;
14799 break;
14800 case X86::BI__builtin_ia32_ktestcdi:
14801 IID = Intrinsic::x86_avx512_ktestc_q;
14802 break;
14803 case X86::BI__builtin_ia32_ktestzdi:
14804 IID = Intrinsic::x86_avx512_ktestz_q;
14805 break;
14806 }
14807
14808 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14809 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
14810 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
14811 Function *Intr = CGM.getIntrinsic(IID);
14812 return Builder.CreateCall(Intr, {LHS, RHS});
14813 }
14814
14815 case X86::BI__builtin_ia32_kaddqi:
14816 case X86::BI__builtin_ia32_kaddhi:
14817 case X86::BI__builtin_ia32_kaddsi:
14818 case X86::BI__builtin_ia32_kadddi: {
14819 Intrinsic::ID IID;
14820 switch (BuiltinID) {
14821 default: llvm_unreachable("Unsupported intrinsic!");
14822 case X86::BI__builtin_ia32_kaddqi:
14823 IID = Intrinsic::x86_avx512_kadd_b;
14824 break;
14825 case X86::BI__builtin_ia32_kaddhi:
14826 IID = Intrinsic::x86_avx512_kadd_w;
14827 break;
14828 case X86::BI__builtin_ia32_kaddsi:
14829 IID = Intrinsic::x86_avx512_kadd_d;
14830 break;
14831 case X86::BI__builtin_ia32_kadddi:
14832 IID = Intrinsic::x86_avx512_kadd_q;
14833 break;
14834 }
14835
14836 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14837 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
14838 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
14839 Function *Intr = CGM.getIntrinsic(IID);
14840 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
14841 return Builder.CreateBitCast(Res, Ops[0]->getType());
14842 }
14843 case X86::BI__builtin_ia32_kandqi:
14844 case X86::BI__builtin_ia32_kandhi:
14845 case X86::BI__builtin_ia32_kandsi:
14846 case X86::BI__builtin_ia32_kanddi:
14847 return EmitX86MaskLogic(*this, Instruction::And, Ops);
14848 case X86::BI__builtin_ia32_kandnqi:
14849 case X86::BI__builtin_ia32_kandnhi:
14850 case X86::BI__builtin_ia32_kandnsi:
14851 case X86::BI__builtin_ia32_kandndi:
14852 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
14853 case X86::BI__builtin_ia32_korqi:
14854 case X86::BI__builtin_ia32_korhi:
14855 case X86::BI__builtin_ia32_korsi:
14856 case X86::BI__builtin_ia32_kordi:
14857 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
14858 case X86::BI__builtin_ia32_kxnorqi:
14859 case X86::BI__builtin_ia32_kxnorhi:
14860 case X86::BI__builtin_ia32_kxnorsi:
14861 case X86::BI__builtin_ia32_kxnordi:
14862 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
14863 case X86::BI__builtin_ia32_kxorqi:
14864 case X86::BI__builtin_ia32_kxorhi:
14865 case X86::BI__builtin_ia32_kxorsi:
14866 case X86::BI__builtin_ia32_kxordi:
14867 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
14868 case X86::BI__builtin_ia32_knotqi:
14869 case X86::BI__builtin_ia32_knothi:
14870 case X86::BI__builtin_ia32_knotsi:
14871 case X86::BI__builtin_ia32_knotdi: {
14872 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14873 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
14874 return Builder.CreateBitCast(Builder.CreateNot(Res),
14875 Ops[0]->getType());
14876 }
14877 case X86::BI__builtin_ia32_kmovb:
14878 case X86::BI__builtin_ia32_kmovw:
14879 case X86::BI__builtin_ia32_kmovd:
14880 case X86::BI__builtin_ia32_kmovq: {
14881 // Bitcast to vXi1 type and then back to integer. This gets the mask
14882 // register type into the IR, but might be optimized out depending on
14883 // what's around it.
14884 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14885 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
14886 return Builder.CreateBitCast(Res, Ops[0]->getType());
14887 }
14888
14889 case X86::BI__builtin_ia32_kunpckdi:
14890 case X86::BI__builtin_ia32_kunpcksi:
14891 case X86::BI__builtin_ia32_kunpckhi: {
14892 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14893 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
14894 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
14895 int Indices[64];
14896 for (unsigned i = 0; i != NumElts; ++i)
14897 Indices[i] = i;
14898
14899 // First extract half of each vector. This gives better codegen than
14900 // doing it in a single shuffle.
14901 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
14902 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
14903 // Concat the vectors.
14904 // NOTE: Operands are swapped to match the intrinsic definition.
14905 Value *Res =
14906 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
14907 return Builder.CreateBitCast(Res, Ops[0]->getType());
14908 }
14909
14910 case X86::BI__builtin_ia32_vplzcntd_128:
14911 case X86::BI__builtin_ia32_vplzcntd_256:
14912 case X86::BI__builtin_ia32_vplzcntd_512:
14913 case X86::BI__builtin_ia32_vplzcntq_128:
14914 case X86::BI__builtin_ia32_vplzcntq_256:
14915 case X86::BI__builtin_ia32_vplzcntq_512: {
14916 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14917 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
14918 }
14919 case X86::BI__builtin_ia32_sqrtss:
14920 case X86::BI__builtin_ia32_sqrtsd: {
14921 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14922 Function *F;
14923 if (Builder.getIsFPConstrained()) {
14924 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14925 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
14926 A->getType());
14927 A = Builder.CreateConstrainedFPCall(F, {A});
14928 } else {
14929 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
14930 A = Builder.CreateCall(F, {A});
14931 }
14932 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
14933 }
14934 case X86::BI__builtin_ia32_sqrtsh_round_mask:
14935 case X86::BI__builtin_ia32_sqrtsd_round_mask:
14936 case X86::BI__builtin_ia32_sqrtss_round_mask: {
14937 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14938 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
14939 // otherwise keep the intrinsic.
14940 if (CC != 4) {
14941 Intrinsic::ID IID;
14942
14943 switch (BuiltinID) {
14944 default:
14945 llvm_unreachable("Unsupported intrinsic!");
14946 case X86::BI__builtin_ia32_sqrtsh_round_mask:
14947 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
14948 break;
14949 case X86::BI__builtin_ia32_sqrtsd_round_mask:
14950 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
14951 break;
14952 case X86::BI__builtin_ia32_sqrtss_round_mask:
14953 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
14954 break;
14955 }
14956 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
14957 }
14958 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14959 Function *F;
14960 if (Builder.getIsFPConstrained()) {
14961 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14962 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
14963 A->getType());
14964 A = Builder.CreateConstrainedFPCall(F, A);
14965 } else {
14966 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
14967 A = Builder.CreateCall(F, A);
14968 }
14969 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14970 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
14971 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
14972 }
14973 case X86::BI__builtin_ia32_sqrtpd256:
14974 case X86::BI__builtin_ia32_sqrtpd:
14975 case X86::BI__builtin_ia32_sqrtps256:
14976 case X86::BI__builtin_ia32_sqrtps:
14977 case X86::BI__builtin_ia32_sqrtph256:
14978 case X86::BI__builtin_ia32_sqrtph:
14979 case X86::BI__builtin_ia32_sqrtph512:
14980 case X86::BI__builtin_ia32_sqrtps512:
14981 case X86::BI__builtin_ia32_sqrtpd512: {
14982 if (Ops.size() == 2) {
14983 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14984 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
14985 // otherwise keep the intrinsic.
14986 if (CC != 4) {
14987 Intrinsic::ID IID;
14988
14989 switch (BuiltinID) {
14990 default:
14991 llvm_unreachable("Unsupported intrinsic!");
14992 case X86::BI__builtin_ia32_sqrtph512:
14993 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
14994 break;
14995 case X86::BI__builtin_ia32_sqrtps512:
14996 IID = Intrinsic::x86_avx512_sqrt_ps_512;
14997 break;
14998 case X86::BI__builtin_ia32_sqrtpd512:
14999 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15000 break;
15001 }
15002 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15003 }
15004 }
15005 if (Builder.getIsFPConstrained()) {
15006 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15007 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15008 Ops[0]->getType());
15009 return Builder.CreateConstrainedFPCall(F, Ops[0]);
15010 } else {
15011 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15012 return Builder.CreateCall(F, Ops[0]);
15013 }
15014 }
15015
15016 case X86::BI__builtin_ia32_pmuludq128:
15017 case X86::BI__builtin_ia32_pmuludq256:
15018 case X86::BI__builtin_ia32_pmuludq512:
15019 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15020
15021 case X86::BI__builtin_ia32_pmuldq128:
15022 case X86::BI__builtin_ia32_pmuldq256:
15023 case X86::BI__builtin_ia32_pmuldq512:
15024 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15025
15026 case X86::BI__builtin_ia32_pternlogd512_mask:
15027 case X86::BI__builtin_ia32_pternlogq512_mask:
15028 case X86::BI__builtin_ia32_pternlogd128_mask:
15029 case X86::BI__builtin_ia32_pternlogd256_mask:
15030 case X86::BI__builtin_ia32_pternlogq128_mask:
15031 case X86::BI__builtin_ia32_pternlogq256_mask:
15032 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15033
15034 case X86::BI__builtin_ia32_pternlogd512_maskz:
15035 case X86::BI__builtin_ia32_pternlogq512_maskz:
15036 case X86::BI__builtin_ia32_pternlogd128_maskz:
15037 case X86::BI__builtin_ia32_pternlogd256_maskz:
15038 case X86::BI__builtin_ia32_pternlogq128_maskz:
15039 case X86::BI__builtin_ia32_pternlogq256_maskz:
15040 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15041
15042 case X86::BI__builtin_ia32_vpshldd128:
15043 case X86::BI__builtin_ia32_vpshldd256:
15044 case X86::BI__builtin_ia32_vpshldd512:
15045 case X86::BI__builtin_ia32_vpshldq128:
15046 case X86::BI__builtin_ia32_vpshldq256:
15047 case X86::BI__builtin_ia32_vpshldq512:
15048 case X86::BI__builtin_ia32_vpshldw128:
15049 case X86::BI__builtin_ia32_vpshldw256:
15050 case X86::BI__builtin_ia32_vpshldw512:
15051 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15052
15053 case X86::BI__builtin_ia32_vpshrdd128:
15054 case X86::BI__builtin_ia32_vpshrdd256:
15055 case X86::BI__builtin_ia32_vpshrdd512:
15056 case X86::BI__builtin_ia32_vpshrdq128:
15057 case X86::BI__builtin_ia32_vpshrdq256:
15058 case X86::BI__builtin_ia32_vpshrdq512:
15059 case X86::BI__builtin_ia32_vpshrdw128:
15060 case X86::BI__builtin_ia32_vpshrdw256:
15061 case X86::BI__builtin_ia32_vpshrdw512:
15062 // Ops 0 and 1 are swapped.
15063 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15064
15065 case X86::BI__builtin_ia32_vpshldvd128:
15066 case X86::BI__builtin_ia32_vpshldvd256:
15067 case X86::BI__builtin_ia32_vpshldvd512:
15068 case X86::BI__builtin_ia32_vpshldvq128:
15069 case X86::BI__builtin_ia32_vpshldvq256:
15070 case X86::BI__builtin_ia32_vpshldvq512:
15071 case X86::BI__builtin_ia32_vpshldvw128:
15072 case X86::BI__builtin_ia32_vpshldvw256:
15073 case X86::BI__builtin_ia32_vpshldvw512:
15074 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15075
15076 case X86::BI__builtin_ia32_vpshrdvd128:
15077 case X86::BI__builtin_ia32_vpshrdvd256:
15078 case X86::BI__builtin_ia32_vpshrdvd512:
15079 case X86::BI__builtin_ia32_vpshrdvq128:
15080 case X86::BI__builtin_ia32_vpshrdvq256:
15081 case X86::BI__builtin_ia32_vpshrdvq512:
15082 case X86::BI__builtin_ia32_vpshrdvw128:
15083 case X86::BI__builtin_ia32_vpshrdvw256:
15084 case X86::BI__builtin_ia32_vpshrdvw512:
15085 // Ops 0 and 1 are swapped.
15086 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15087
15088 // Reductions
15089 case X86::BI__builtin_ia32_reduce_fadd_pd512:
15090 case X86::BI__builtin_ia32_reduce_fadd_ps512:
15091 case X86::BI__builtin_ia32_reduce_fadd_ph512:
15092 case X86::BI__builtin_ia32_reduce_fadd_ph256:
15093 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15094 Function *F =
15095 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15096 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15097 Builder.getFastMathFlags().setAllowReassoc();
15098 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15099 }
15100 case X86::BI__builtin_ia32_reduce_fmul_pd512:
15101 case X86::BI__builtin_ia32_reduce_fmul_ps512:
15102 case X86::BI__builtin_ia32_reduce_fmul_ph512:
15103 case X86::BI__builtin_ia32_reduce_fmul_ph256:
15104 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15105 Function *F =
15106 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15107 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15108 Builder.getFastMathFlags().setAllowReassoc();
15109 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15110 }
15111 case X86::BI__builtin_ia32_reduce_fmax_pd512:
15112 case X86::BI__builtin_ia32_reduce_fmax_ps512:
15113 case X86::BI__builtin_ia32_reduce_fmax_ph512:
15114 case X86::BI__builtin_ia32_reduce_fmax_ph256:
15115 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15116 Function *F =
15117 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15118 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15119 Builder.getFastMathFlags().setNoNaNs();
15120 return Builder.CreateCall(F, {Ops[0]});
15121 }
15122 case X86::BI__builtin_ia32_reduce_fmin_pd512:
15123 case X86::BI__builtin_ia32_reduce_fmin_ps512:
15124 case X86::BI__builtin_ia32_reduce_fmin_ph512:
15125 case X86::BI__builtin_ia32_reduce_fmin_ph256:
15126 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15127 Function *F =
15128 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15129 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15130 Builder.getFastMathFlags().setNoNaNs();
15131 return Builder.CreateCall(F, {Ops[0]});
15132 }
15133
15134 // 3DNow!
15135 case X86::BI__builtin_ia32_pswapdsf:
15136 case X86::BI__builtin_ia32_pswapdsi: {
15137 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
15138 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
15139 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15140 return Builder.CreateCall(F, Ops, "pswapd");
15141 }
15142 case X86::BI__builtin_ia32_rdrand16_step:
15143 case X86::BI__builtin_ia32_rdrand32_step:
15144 case X86::BI__builtin_ia32_rdrand64_step:
15145 case X86::BI__builtin_ia32_rdseed16_step:
15146 case X86::BI__builtin_ia32_rdseed32_step:
15147 case X86::BI__builtin_ia32_rdseed64_step: {
15148 Intrinsic::ID ID;
15149 switch (BuiltinID) {
15150 default: llvm_unreachable("Unsupported intrinsic!");
15151 case X86::BI__builtin_ia32_rdrand16_step:
15152 ID = Intrinsic::x86_rdrand_16;
15153 break;
15154 case X86::BI__builtin_ia32_rdrand32_step:
15155 ID = Intrinsic::x86_rdrand_32;
15156 break;
15157 case X86::BI__builtin_ia32_rdrand64_step:
15158 ID = Intrinsic::x86_rdrand_64;
15159 break;
15160 case X86::BI__builtin_ia32_rdseed16_step:
15161 ID = Intrinsic::x86_rdseed_16;
15162 break;
15163 case X86::BI__builtin_ia32_rdseed32_step:
15164 ID = Intrinsic::x86_rdseed_32;
15165 break;
15166 case X86::BI__builtin_ia32_rdseed64_step:
15167 ID = Intrinsic::x86_rdseed_64;
15168 break;
15169 }
15170
15171 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
15172 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
15173 Ops[0]);
15174 return Builder.CreateExtractValue(Call, 1);
15175 }
15176 case X86::BI__builtin_ia32_addcarryx_u32:
15177 case X86::BI__builtin_ia32_addcarryx_u64:
15178 case X86::BI__builtin_ia32_subborrow_u32:
15179 case X86::BI__builtin_ia32_subborrow_u64: {
15180 Intrinsic::ID IID;
15181 switch (BuiltinID) {
15182 default: llvm_unreachable("Unsupported intrinsic!");
15183 case X86::BI__builtin_ia32_addcarryx_u32:
15184 IID = Intrinsic::x86_addcarry_32;
15185 break;
15186 case X86::BI__builtin_ia32_addcarryx_u64:
15187 IID = Intrinsic::x86_addcarry_64;
15188 break;
15189 case X86::BI__builtin_ia32_subborrow_u32:
15190 IID = Intrinsic::x86_subborrow_32;
15191 break;
15192 case X86::BI__builtin_ia32_subborrow_u64:
15193 IID = Intrinsic::x86_subborrow_64;
15194 break;
15195 }
15196
15197 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
15198 { Ops[0], Ops[1], Ops[2] });
15199 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15200 Ops[3]);
15201 return Builder.CreateExtractValue(Call, 0);
15202 }
15203
15204 case X86::BI__builtin_ia32_fpclassps128_mask:
15205 case X86::BI__builtin_ia32_fpclassps256_mask:
15206 case X86::BI__builtin_ia32_fpclassps512_mask:
15207 case X86::BI__builtin_ia32_fpclassph128_mask:
15208 case X86::BI__builtin_ia32_fpclassph256_mask:
15209 case X86::BI__builtin_ia32_fpclassph512_mask:
15210 case X86::BI__builtin_ia32_fpclasspd128_mask:
15211 case X86::BI__builtin_ia32_fpclasspd256_mask:
15212 case X86::BI__builtin_ia32_fpclasspd512_mask: {
15213 unsigned NumElts =
15214 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15215 Value *MaskIn = Ops[2];
15216 Ops.erase(&Ops[2]);
15217
15218 Intrinsic::ID ID;
15219 switch (BuiltinID) {
15220 default: llvm_unreachable("Unsupported intrinsic!");
15221 case X86::BI__builtin_ia32_fpclassph128_mask:
15222 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
15223 break;
15224 case X86::BI__builtin_ia32_fpclassph256_mask:
15225 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
15226 break;
15227 case X86::BI__builtin_ia32_fpclassph512_mask:
15228 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
15229 break;
15230 case X86::BI__builtin_ia32_fpclassps128_mask:
15231 ID = Intrinsic::x86_avx512_fpclass_ps_128;
15232 break;
15233 case X86::BI__builtin_ia32_fpclassps256_mask:
15234 ID = Intrinsic::x86_avx512_fpclass_ps_256;
15235 break;
15236 case X86::BI__builtin_ia32_fpclassps512_mask:
15237 ID = Intrinsic::x86_avx512_fpclass_ps_512;
15238 break;
15239 case X86::BI__builtin_ia32_fpclasspd128_mask:
15240 ID = Intrinsic::x86_avx512_fpclass_pd_128;
15241 break;
15242 case X86::BI__builtin_ia32_fpclasspd256_mask:
15243 ID = Intrinsic::x86_avx512_fpclass_pd_256;
15244 break;
15245 case X86::BI__builtin_ia32_fpclasspd512_mask:
15246 ID = Intrinsic::x86_avx512_fpclass_pd_512;
15247 break;
15248 }
15249
15250 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15251 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
15252 }
15253
15254 case X86::BI__builtin_ia32_vp2intersect_q_512:
15255 case X86::BI__builtin_ia32_vp2intersect_q_256:
15256 case X86::BI__builtin_ia32_vp2intersect_q_128:
15257 case X86::BI__builtin_ia32_vp2intersect_d_512:
15258 case X86::BI__builtin_ia32_vp2intersect_d_256:
15259 case X86::BI__builtin_ia32_vp2intersect_d_128: {
15260 unsigned NumElts =
15261 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15262 Intrinsic::ID ID;
15263
15264 switch (BuiltinID) {
15265 default: llvm_unreachable("Unsupported intrinsic!");
15266 case X86::BI__builtin_ia32_vp2intersect_q_512:
15267 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
15268 break;
15269 case X86::BI__builtin_ia32_vp2intersect_q_256:
15270 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
15271 break;
15272 case X86::BI__builtin_ia32_vp2intersect_q_128:
15273 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
15274 break;
15275 case X86::BI__builtin_ia32_vp2intersect_d_512:
15276 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
15277 break;
15278 case X86::BI__builtin_ia32_vp2intersect_d_256:
15279 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
15280 break;
15281 case X86::BI__builtin_ia32_vp2intersect_d_128:
15282 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
15283 break;
15284 }
15285
15286 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
15287 Value *Result = Builder.CreateExtractValue(Call, 0);
15288 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
15289 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
15290
15291 Result = Builder.CreateExtractValue(Call, 1);
15292 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
15293 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
15294 }
15295
15296 case X86::BI__builtin_ia32_vpmultishiftqb128:
15297 case X86::BI__builtin_ia32_vpmultishiftqb256:
15298 case X86::BI__builtin_ia32_vpmultishiftqb512: {
15299 Intrinsic::ID ID;
15300 switch (BuiltinID) {
15301 default: llvm_unreachable("Unsupported intrinsic!");
15302 case X86::BI__builtin_ia32_vpmultishiftqb128:
15303 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
15304 break;
15305 case X86::BI__builtin_ia32_vpmultishiftqb256:
15306 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
15307 break;
15308 case X86::BI__builtin_ia32_vpmultishiftqb512:
15309 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
15310 break;
15311 }
15312
15313 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15314 }
15315
15316 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
15317 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
15318 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
15319 unsigned NumElts =
15320 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15321 Value *MaskIn = Ops[2];
15322 Ops.erase(&Ops[2]);
15323
15324 Intrinsic::ID ID;
15325 switch (BuiltinID) {
15326 default: llvm_unreachable("Unsupported intrinsic!");
15327 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
15328 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
15329 break;
15330 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
15331 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
15332 break;
15333 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
15334 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
15335 break;
15336 }
15337
15338 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15339 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
15340 }
15341
15342 // packed comparison intrinsics
15343 case X86::BI__builtin_ia32_cmpeqps:
15344 case X86::BI__builtin_ia32_cmpeqpd:
15345 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
15346 case X86::BI__builtin_ia32_cmpltps:
15347 case X86::BI__builtin_ia32_cmpltpd:
15348 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
15349 case X86::BI__builtin_ia32_cmpleps:
15350 case X86::BI__builtin_ia32_cmplepd:
15351 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
15352 case X86::BI__builtin_ia32_cmpunordps:
15353 case X86::BI__builtin_ia32_cmpunordpd:
15354 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
15355 case X86::BI__builtin_ia32_cmpneqps:
15356 case X86::BI__builtin_ia32_cmpneqpd:
15357 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
15358 case X86::BI__builtin_ia32_cmpnltps:
15359 case X86::BI__builtin_ia32_cmpnltpd:
15360 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
15361 case X86::BI__builtin_ia32_cmpnleps:
15362 case X86::BI__builtin_ia32_cmpnlepd:
15363 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
15364 case X86::BI__builtin_ia32_cmpordps:
15365 case X86::BI__builtin_ia32_cmpordpd:
15366 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
15367 case X86::BI__builtin_ia32_cmpph128_mask:
15368 case X86::BI__builtin_ia32_cmpph256_mask:
15369 case X86::BI__builtin_ia32_cmpph512_mask:
15370 case X86::BI__builtin_ia32_cmpps128_mask:
15371 case X86::BI__builtin_ia32_cmpps256_mask:
15372 case X86::BI__builtin_ia32_cmpps512_mask:
15373 case X86::BI__builtin_ia32_cmppd128_mask:
15374 case X86::BI__builtin_ia32_cmppd256_mask:
15375 case X86::BI__builtin_ia32_cmppd512_mask:
15376 IsMaskFCmp = true;
15377 [[fallthrough]];
15378 case X86::BI__builtin_ia32_cmpps:
15379 case X86::BI__builtin_ia32_cmpps256:
15380 case X86::BI__builtin_ia32_cmppd:
15381 case X86::BI__builtin_ia32_cmppd256: {
15382 // Lowering vector comparisons to fcmp instructions, while
15383 // ignoring signalling behaviour requested
15384 // ignoring rounding mode requested
15385 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
15386
15387 // The third argument is the comparison condition, and integer in the
15388 // range [0, 31]
15389 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
15390
15391 // Lowering to IR fcmp instruction.
15392 // Ignoring requested signaling behaviour,
15393 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
15394 FCmpInst::Predicate Pred;
15395 bool IsSignaling;
15396 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
15397 // behavior is inverted. We'll handle that after the switch.
15398 switch (CC & 0xf) {
15399 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
15400 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
15401 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
15402 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
15403 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
15404 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
15405 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
15406 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
15407 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
15408 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
15409 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
15410 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
15411 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
15412 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
15413 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
15414 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
15415 default: llvm_unreachable("Unhandled CC");
15416 }
15417
15418 // Invert the signalling behavior for 16-31.
15419 if (CC & 0x10)
15420 IsSignaling = !IsSignaling;
15421
15422 // If the predicate is true or false and we're using constrained intrinsics,
15423 // we don't have a compare intrinsic we can use. Just use the legacy X86
15424 // specific intrinsic.
15425 // If the intrinsic is mask enabled and we're using constrained intrinsics,
15426 // use the legacy X86 specific intrinsic.
15427 if (Builder.getIsFPConstrained() &&
15428 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
15429 IsMaskFCmp)) {
15430
15431 Intrinsic::ID IID;
15432 switch (BuiltinID) {
15433 default: llvm_unreachable("Unexpected builtin");
15434 case X86::BI__builtin_ia32_cmpps:
15435 IID = Intrinsic::x86_sse_cmp_ps;
15436 break;
15437 case X86::BI__builtin_ia32_cmpps256:
15438 IID = Intrinsic::x86_avx_cmp_ps_256;
15439 break;
15440 case X86::BI__builtin_ia32_cmppd:
15441 IID = Intrinsic::x86_sse2_cmp_pd;
15442 break;
15443 case X86::BI__builtin_ia32_cmppd256:
15444 IID = Intrinsic::x86_avx_cmp_pd_256;
15445 break;
15446 case X86::BI__builtin_ia32_cmpps512_mask:
15447 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
15448 break;
15449 case X86::BI__builtin_ia32_cmppd512_mask:
15450 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
15451 break;
15452 case X86::BI__builtin_ia32_cmpps128_mask:
15453 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
15454 break;
15455 case X86::BI__builtin_ia32_cmpps256_mask:
15456 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
15457 break;
15458 case X86::BI__builtin_ia32_cmppd128_mask:
15459 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
15460 break;
15461 case X86::BI__builtin_ia32_cmppd256_mask:
15462 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
15463 break;
15464 }
15465
15466 Function *Intr = CGM.getIntrinsic(IID);
15467 if (IsMaskFCmp) {
15468 unsigned NumElts =
15469 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15470 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
15471 Value *Cmp = Builder.CreateCall(Intr, Ops);
15472 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
15473 }
15474
15475 return Builder.CreateCall(Intr, Ops);
15476 }
15477
15478 // Builtins without the _mask suffix return a vector of integers
15479 // of the same width as the input vectors
15480 if (IsMaskFCmp) {
15481 // We ignore SAE if strict FP is disabled. We only keep precise
15482 // exception behavior under strict FP.
15483 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
15484 // object will be required.
15485 unsigned NumElts =
15486 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15487 Value *Cmp;
15488 if (IsSignaling)
15489 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15490 else
15491 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15492 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
15493 }
15494
15495 return getVectorFCmpIR(Pred, IsSignaling);
15496 }
15497
15498 // SSE scalar comparison intrinsics
15499 case X86::BI__builtin_ia32_cmpeqss:
15500 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
15501 case X86::BI__builtin_ia32_cmpltss:
15502 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
15503 case X86::BI__builtin_ia32_cmpless:
15504 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
15505 case X86::BI__builtin_ia32_cmpunordss:
15506 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
15507 case X86::BI__builtin_ia32_cmpneqss:
15508 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
15509 case X86::BI__builtin_ia32_cmpnltss:
15510 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
15511 case X86::BI__builtin_ia32_cmpnless:
15512 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
15513 case X86::BI__builtin_ia32_cmpordss:
15514 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
15515 case X86::BI__builtin_ia32_cmpeqsd:
15516 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
15517 case X86::BI__builtin_ia32_cmpltsd:
15518 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
15519 case X86::BI__builtin_ia32_cmplesd:
15520 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
15521 case X86::BI__builtin_ia32_cmpunordsd:
15522 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
15523 case X86::BI__builtin_ia32_cmpneqsd:
15524 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
15525 case X86::BI__builtin_ia32_cmpnltsd:
15526 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
15527 case X86::BI__builtin_ia32_cmpnlesd:
15528 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
15529 case X86::BI__builtin_ia32_cmpordsd:
15530 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
15531
15532 // f16c half2float intrinsics
15533 case X86::BI__builtin_ia32_vcvtph2ps:
15534 case X86::BI__builtin_ia32_vcvtph2ps256:
15535 case X86::BI__builtin_ia32_vcvtph2ps_mask:
15536 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
15537 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
15538 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15539 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
15540 }
15541
15542 // AVX512 bf16 intrinsics
15543 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
15544 Ops[2] = getMaskVecValue(
15545 *this, Ops[2],
15546 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
15547 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
15548 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15549 }
15550 case X86::BI__builtin_ia32_cvtsbf162ss_32:
15551 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
15552
15553 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
15554 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
15555 Intrinsic::ID IID;
15556 switch (BuiltinID) {
15557 default: llvm_unreachable("Unsupported intrinsic!");
15558 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
15559 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
15560 break;
15561 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
15562 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
15563 break;
15564 }
15565 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
15566 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15567 }
15568
15569 case X86::BI__cpuid:
15570 case X86::BI__cpuidex: {
15571 Value *FuncId = EmitScalarExpr(E->getArg(1));
15572 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
15573 ? EmitScalarExpr(E->getArg(2))
15574 : llvm::ConstantInt::get(Int32Ty, 0);
15575
15576 llvm::StructType *CpuidRetTy =
15577 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
15578 llvm::FunctionType *FTy =
15579 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
15580
15581 StringRef Asm, Constraints;
15582 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
15583 Asm = "cpuid";
15584 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
15585 } else {
15586 // x86-64 uses %rbx as the base register, so preserve it.
15587 Asm = "xchgq %rbx, ${1:q}\n"
15588 "cpuid\n"
15589 "xchgq %rbx, ${1:q}";
15590 Constraints = "={ax},=r,={cx},={dx},0,2";
15591 }
15592
15593 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
15594 /*hasSideEffects=*/false);
15595 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
15596 Value *BasePtr = EmitScalarExpr(E->getArg(0));
15597 Value *Store = nullptr;
15598 for (unsigned i = 0; i < 4; i++) {
15599 Value *Extracted = Builder.CreateExtractValue(IACall, i);
15600 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
15601 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
15602 }
15603
15604 // Return the last store instruction to signal that we have emitted the
15605 // the intrinsic.
15606 return Store;
15607 }
15608
15609 case X86::BI__emul:
15610 case X86::BI__emulu: {
15611 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
15612 bool isSigned = (BuiltinID == X86::BI__emul);
15613 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
15614 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
15615 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
15616 }
15617 case X86::BI__mulh:
15618 case X86::BI__umulh:
15619 case X86::BI_mul128:
15620 case X86::BI_umul128: {
15621 llvm::Type *ResType = ConvertType(E->getType());
15622 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
15623
15624 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
15625 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
15626 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
15627
15628 Value *MulResult, *HigherBits;
15629 if (IsSigned) {
15630 MulResult = Builder.CreateNSWMul(LHS, RHS);
15631 HigherBits = Builder.CreateAShr(MulResult, 64);
15632 } else {
15633 MulResult = Builder.CreateNUWMul(LHS, RHS);
15634 HigherBits = Builder.CreateLShr(MulResult, 64);
15635 }
15636 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
15637
15638 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
15639 return HigherBits;
15640
15641 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
15642 Builder.CreateStore(HigherBits, HighBitsAddress);
15643 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
15644 }
15645
15646 case X86::BI__faststorefence: {
15647 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
15648 llvm::SyncScope::System);
15649 }
15650 case X86::BI__shiftleft128:
15651 case X86::BI__shiftright128: {
15652 llvm::Function *F = CGM.getIntrinsic(
15653 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
15654 Int64Ty);
15655 // Flip low/high ops and zero-extend amount to matching type.
15656 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
15657 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
15658 std::swap(Ops[0], Ops[1]);
15659 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
15660 return Builder.CreateCall(F, Ops);
15661 }
15662 case X86::BI_ReadWriteBarrier:
15663 case X86::BI_ReadBarrier:
15664 case X86::BI_WriteBarrier: {
15665 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
15666 llvm::SyncScope::SingleThread);
15667 }
15668
15669 case X86::BI_AddressOfReturnAddress: {
15670 Function *F =
15671 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
15672 return Builder.CreateCall(F);
15673 }
15674 case X86::BI__stosb: {
15675 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
15676 // instruction, but it will create a memset that won't be optimized away.
15677 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
15678 }
15679 case X86::BI__ud2:
15680 // llvm.trap makes a ud2a instruction on x86.
15681 return EmitTrapCall(Intrinsic::trap);
15682 case X86::BI__int2c: {
15683 // This syscall signals a driver assertion failure in x86 NT kernels.
15684 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
15685 llvm::InlineAsm *IA =
15686 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
15687 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
15688 getLLVMContext(), llvm::AttributeList::FunctionIndex,
15689 llvm::Attribute::NoReturn);
15690 llvm::CallInst *CI = Builder.CreateCall(IA);
15691 CI->setAttributes(NoReturnAttr);
15692 return CI;
15693 }
15694 case X86::BI__readfsbyte:
15695 case X86::BI__readfsword:
15696 case X86::BI__readfsdword:
15697 case X86::BI__readfsqword: {
15698 llvm::Type *IntTy = ConvertType(E->getType());
15699 Value *Ptr =
15700 Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
15701 LoadInst *Load = Builder.CreateAlignedLoad(
15702 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
15703 Load->setVolatile(true);
15704 return Load;
15705 }
15706 case X86::BI__readgsbyte:
15707 case X86::BI__readgsword:
15708 case X86::BI__readgsdword:
15709 case X86::BI__readgsqword: {
15710 llvm::Type *IntTy = ConvertType(E->getType());
15711 Value *Ptr =
15712 Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
15713 LoadInst *Load = Builder.CreateAlignedLoad(
15714 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
15715 Load->setVolatile(true);
15716 return Load;
15717 }
15718 case X86::BI__builtin_ia32_encodekey128_u32: {
15719 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
15720
15721 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
15722
15723 for (int i = 0; i < 3; ++i) {
15724 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
15725 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
15726 Ptr = Builder.CreateBitCast(
15727 Ptr, llvm::PointerType::getUnqual(Extract->getType()));
15728 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
15729 }
15730
15731 return Builder.CreateExtractValue(Call, 0);
15732 }
15733 case X86::BI__builtin_ia32_encodekey256_u32: {
15734 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
15735
15736 Value *Call =
15737 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
15738
15739 for (int i = 0; i < 4; ++i) {
15740 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
15741 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
15742 Ptr = Builder.CreateBitCast(
15743 Ptr, llvm::PointerType::getUnqual(Extract->getType()));
15744 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
15745 }
15746
15747 return Builder.CreateExtractValue(Call, 0);
15748 }
15749 case X86::BI__builtin_ia32_aesenc128kl_u8:
15750 case X86::BI__builtin_ia32_aesdec128kl_u8:
15751 case X86::BI__builtin_ia32_aesenc256kl_u8:
15752 case X86::BI__builtin_ia32_aesdec256kl_u8: {
15753 Intrinsic::ID IID;
15754 StringRef BlockName;
15755 switch (BuiltinID) {
15756 default:
15757 llvm_unreachable("Unexpected builtin");
15758 case X86::BI__builtin_ia32_aesenc128kl_u8:
15759 IID = Intrinsic::x86_aesenc128kl;
15760 BlockName = "aesenc128kl";
15761 break;
15762 case X86::BI__builtin_ia32_aesdec128kl_u8:
15763 IID = Intrinsic::x86_aesdec128kl;
15764 BlockName = "aesdec128kl";
15765 break;
15766 case X86::BI__builtin_ia32_aesenc256kl_u8:
15767 IID = Intrinsic::x86_aesenc256kl;
15768 BlockName = "aesenc256kl";
15769 break;
15770 case X86::BI__builtin_ia32_aesdec256kl_u8:
15771 IID = Intrinsic::x86_aesdec256kl;
15772 BlockName = "aesdec256kl";
15773 break;
15774 }
15775
15776 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
15777
15778 BasicBlock *NoError =
15779 createBasicBlock(BlockName + "_no_error", this->CurFn);
15780 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
15781 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
15782
15783 Value *Ret = Builder.CreateExtractValue(Call, 0);
15784 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
15785 Value *Out = Builder.CreateExtractValue(Call, 1);
15786 Builder.CreateCondBr(Succ, NoError, Error);
15787
15788 Builder.SetInsertPoint(NoError);
15789 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
15790 Builder.CreateBr(End);
15791
15792 Builder.SetInsertPoint(Error);
15793 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
15794 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
15795 Builder.CreateBr(End);
15796
15797 Builder.SetInsertPoint(End);
15798 return Builder.CreateExtractValue(Call, 0);
15799 }
15800 case X86::BI__builtin_ia32_aesencwide128kl_u8:
15801 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
15802 case X86::BI__builtin_ia32_aesencwide256kl_u8:
15803 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
15804 Intrinsic::ID IID;
15805 StringRef BlockName;
15806 switch (BuiltinID) {
15807 case X86::BI__builtin_ia32_aesencwide128kl_u8:
15808 IID = Intrinsic::x86_aesencwide128kl;
15809 BlockName = "aesencwide128kl";
15810 break;
15811 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
15812 IID = Intrinsic::x86_aesdecwide128kl;
15813 BlockName = "aesdecwide128kl";
15814 break;
15815 case X86::BI__builtin_ia32_aesencwide256kl_u8:
15816 IID = Intrinsic::x86_aesencwide256kl;
15817 BlockName = "aesencwide256kl";
15818 break;
15819 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
15820 IID = Intrinsic::x86_aesdecwide256kl;
15821 BlockName = "aesdecwide256kl";
15822 break;
15823 }
15824
15825 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
15826 Value *InOps[9];
15827 InOps[0] = Ops[2];
15828 for (int i = 0; i != 8; ++i) {
15829 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
15830 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
15831 }
15832
15833 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
15834
15835 BasicBlock *NoError =
15836 createBasicBlock(BlockName + "_no_error", this->CurFn);
15837 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
15838 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
15839
15840 Value *Ret = Builder.CreateExtractValue(Call, 0);
15841 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
15842 Builder.CreateCondBr(Succ, NoError, Error);
15843
15844 Builder.SetInsertPoint(NoError);
15845 for (int i = 0; i != 8; ++i) {
15846 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
15847 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
15848 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
15849 }
15850 Builder.CreateBr(End);
15851
15852 Builder.SetInsertPoint(Error);
15853 for (int i = 0; i != 8; ++i) {
15854 Value *Out = Builder.CreateExtractValue(Call, i + 1);
15855 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
15856 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
15857 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
15858 }
15859 Builder.CreateBr(End);
15860
15861 Builder.SetInsertPoint(End);
15862 return Builder.CreateExtractValue(Call, 0);
15863 }
15864 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
15865 IsConjFMA = true;
15866 [[fallthrough]];
15867 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
15868 Intrinsic::ID IID = IsConjFMA
15869 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
15870 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
15871 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15872 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
15873 }
15874 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
15875 IsConjFMA = true;
15876 [[fallthrough]];
15877 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
15878 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
15879 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
15880 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15881 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
15882 return EmitX86Select(*this, And, Call, Ops[0]);
15883 }
15884 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
15885 IsConjFMA = true;
15886 [[fallthrough]];
15887 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
15888 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
15889 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
15890 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15891 static constexpr int Mask[] = {0, 5, 6, 7};
15892 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
15893 }
15894 case X86::BI__builtin_ia32_prefetchi:
15895 return Builder.CreateCall(
15896 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
15897 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
15898 llvm::ConstantInt::get(Int32Ty, 0)});
15899 }
15900}
15901
15902Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
15903 const CallExpr *E) {
15904 // Do not emit the builtin arguments in the arguments of a function call,
15905 // because the evaluation order of function arguments is not specified in C++.
15906 // This is important when testing to ensure the arguments are emitted in the
15907 // same order every time. Eg:
15908 // Instead of:
15909 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
15910 // EmitScalarExpr(E->getArg(1)), "swdiv");
15911 // Use:
15912 // Value *Op0 = EmitScalarExpr(E->getArg(0));
15913 // Value *Op1 = EmitScalarExpr(E->getArg(1));
15914 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
15915
15916 Intrinsic::ID ID = Intrinsic::not_intrinsic;
15917
15918 switch (BuiltinID) {
15919 default: return nullptr;
15920
15921 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
15922 // call __builtin_readcyclecounter.
15923 case PPC::BI__builtin_ppc_get_timebase:
15924 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
15925
15926 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
15927 case PPC::BI__builtin_altivec_lvx:
15928 case PPC::BI__builtin_altivec_lvxl:
15929 case PPC::BI__builtin_altivec_lvebx:
15930 case PPC::BI__builtin_altivec_lvehx:
15931 case PPC::BI__builtin_altivec_lvewx:
15932 case PPC::BI__builtin_altivec_lvsl:
15933 case PPC::BI__builtin_altivec_lvsr:
15934 case PPC::BI__builtin_vsx_lxvd2x:
15935 case PPC::BI__builtin_vsx_lxvw4x:
15936 case PPC::BI__builtin_vsx_lxvd2x_be:
15937 case PPC::BI__builtin_vsx_lxvw4x_be:
15938 case PPC::BI__builtin_vsx_lxvl:
15939 case PPC::BI__builtin_vsx_lxvll:
15940 {
15942 Ops.push_back(EmitScalarExpr(E->getArg(0)));
15943 Ops.push_back(EmitScalarExpr(E->getArg(1)));
15944 if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
15945 BuiltinID == PPC::BI__builtin_vsx_lxvll){
15946 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
15947 }else {
15948 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
15949 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
15950 Ops.pop_back();
15951 }
15952
15953 switch (BuiltinID) {
15954 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
15955 case PPC::BI__builtin_altivec_lvx:
15956 ID = Intrinsic::ppc_altivec_lvx;
15957 break;
15958 case PPC::BI__builtin_altivec_lvxl:
15959 ID = Intrinsic::ppc_altivec_lvxl;
15960 break;
15961 case PPC::BI__builtin_altivec_lvebx:
15962 ID = Intrinsic::ppc_altivec_lvebx;
15963 break;
15964 case PPC::BI__builtin_altivec_lvehx:
15965 ID = Intrinsic::ppc_altivec_lvehx;
15966 break;
15967 case PPC::BI__builtin_altivec_lvewx:
15968 ID = Intrinsic::ppc_altivec_lvewx;
15969 break;
15970 case PPC::BI__builtin_altivec_lvsl:
15971 ID = Intrinsic::ppc_altivec_lvsl;
15972 break;
15973 case PPC::BI__builtin_altivec_lvsr:
15974 ID = Intrinsic::ppc_altivec_lvsr;
15975 break;
15976 case PPC::BI__builtin_vsx_lxvd2x:
15977 ID = Intrinsic::ppc_vsx_lxvd2x;
15978 break;
15979 case PPC::BI__builtin_vsx_lxvw4x:
15980 ID = Intrinsic::ppc_vsx_lxvw4x;
15981 break;
15982 case PPC::BI__builtin_vsx_lxvd2x_be:
15983 ID = Intrinsic::ppc_vsx_lxvd2x_be;
15984 break;
15985 case PPC::BI__builtin_vsx_lxvw4x_be:
15986 ID = Intrinsic::ppc_vsx_lxvw4x_be;
15987 break;
15988 case PPC::BI__builtin_vsx_lxvl:
15989 ID = Intrinsic::ppc_vsx_lxvl;
15990 break;
15991 case PPC::BI__builtin_vsx_lxvll:
15992 ID = Intrinsic::ppc_vsx_lxvll;
15993 break;
15994 }
15995 llvm::Function *F = CGM.getIntrinsic(ID);
15996 return Builder.CreateCall(F, Ops, "");
15997 }
15998
15999 // vec_st, vec_xst_be
16000 case PPC::BI__builtin_altivec_stvx:
16001 case PPC::BI__builtin_altivec_stvxl:
16002 case PPC::BI__builtin_altivec_stvebx:
16003 case PPC::BI__builtin_altivec_stvehx:
16004 case PPC::BI__builtin_altivec_stvewx:
16005 case PPC::BI__builtin_vsx_stxvd2x:
16006 case PPC::BI__builtin_vsx_stxvw4x:
16007 case PPC::BI__builtin_vsx_stxvd2x_be:
16008 case PPC::BI__builtin_vsx_stxvw4x_be:
16009 case PPC::BI__builtin_vsx_stxvl:
16010 case PPC::BI__builtin_vsx_stxvll:
16011 {
16013 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16014 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16015 Ops.push_back(EmitScalarExpr(E->getArg(2)));
16016 if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16017 BuiltinID == PPC::BI__builtin_vsx_stxvll ){
16018 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
16019 }else {
16020 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
16021 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16022 Ops.pop_back();
16023 }
16024
16025 switch (BuiltinID) {
16026 default: llvm_unreachable("Unsupported st intrinsic!");
16027 case PPC::BI__builtin_altivec_stvx:
16028 ID = Intrinsic::ppc_altivec_stvx;
16029 break;
16030 case PPC::BI__builtin_altivec_stvxl:
16031 ID = Intrinsic::ppc_altivec_stvxl;
16032 break;
16033 case PPC::BI__builtin_altivec_stvebx:
16034 ID = Intrinsic::ppc_altivec_stvebx;
16035 break;
16036 case PPC::BI__builtin_altivec_stvehx:
16037 ID = Intrinsic::ppc_altivec_stvehx;
16038 break;
16039 case PPC::BI__builtin_altivec_stvewx:
16040 ID = Intrinsic::ppc_altivec_stvewx;
16041 break;
16042 case PPC::BI__builtin_vsx_stxvd2x:
16043 ID = Intrinsic::ppc_vsx_stxvd2x;
16044 break;
16045 case PPC::BI__builtin_vsx_stxvw4x:
16046 ID = Intrinsic::ppc_vsx_stxvw4x;
16047 break;
16048 case PPC::BI__builtin_vsx_stxvd2x_be:
16049 ID = Intrinsic::ppc_vsx_stxvd2x_be;
16050 break;
16051 case PPC::BI__builtin_vsx_stxvw4x_be:
16052 ID = Intrinsic::ppc_vsx_stxvw4x_be;
16053 break;
16054 case PPC::BI__builtin_vsx_stxvl:
16055 ID = Intrinsic::ppc_vsx_stxvl;
16056 break;
16057 case PPC::BI__builtin_vsx_stxvll:
16058 ID = Intrinsic::ppc_vsx_stxvll;
16059 break;
16060 }
16061 llvm::Function *F = CGM.getIntrinsic(ID);
16062 return Builder.CreateCall(F, Ops, "");
16063 }
16064 case PPC::BI__builtin_vsx_ldrmb: {
16065 // Essentially boils down to performing an unaligned VMX load sequence so
16066 // as to avoid crossing a page boundary and then shuffling the elements
16067 // into the right side of the vector register.
16068 Value *Op0 = EmitScalarExpr(E->getArg(0));
16069 Value *Op1 = EmitScalarExpr(E->getArg(1));
16070 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16071 llvm::Type *ResTy = ConvertType(E->getType());
16072 bool IsLE = getTarget().isLittleEndian();
16073
16074 // If the user wants the entire vector, just load the entire vector.
16075 if (NumBytes == 16) {
16076 Value *LD =
16077 Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1)));
16078 if (!IsLE)
16079 return LD;
16080
16081 // Reverse the bytes on LE.
16082 SmallVector<int, 16> RevMask;
16083 for (int Idx = 0; Idx < 16; Idx++)
16084 RevMask.push_back(15 - Idx);
16085 return Builder.CreateShuffleVector(LD, LD, RevMask);
16086 }
16087
16088 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
16089 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
16090 : Intrinsic::ppc_altivec_lvsl);
16091 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
16092 Value *HiMem = Builder.CreateGEP(
16093 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
16094 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
16095 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
16096 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
16097
16098 Op0 = IsLE ? HiLd : LoLd;
16099 Op1 = IsLE ? LoLd : HiLd;
16100 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
16101 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
16102
16103 if (IsLE) {
16104 SmallVector<int, 16> Consts;
16105 for (int Idx = 0; Idx < 16; Idx++) {
16106 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
16107 : 16 - (NumBytes - Idx);
16108 Consts.push_back(Val);
16109 }
16110 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
16111 Zero, Consts);
16112 }
16114 for (int Idx = 0; Idx < 16; Idx++)
16115 Consts.push_back(Builder.getInt8(NumBytes + Idx));
16116 Value *Mask2 = ConstantVector::get(Consts);
16117 return Builder.CreateBitCast(
16118 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
16119 }
16120 case PPC::BI__builtin_vsx_strmb: {
16121 Value *Op0 = EmitScalarExpr(E->getArg(0));
16122 Value *Op1 = EmitScalarExpr(E->getArg(1));
16123 Value *Op2 = EmitScalarExpr(E->getArg(2));
16124 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16125 bool IsLE = getTarget().isLittleEndian();
16126 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
16127 // Storing the whole vector, simply store it on BE and reverse bytes and
16128 // store on LE.
16129 if (Width == 16) {
16130 Value *StVec = Op2;
16131 if (IsLE) {
16132 SmallVector<int, 16> RevMask;
16133 for (int Idx = 0; Idx < 16; Idx++)
16134 RevMask.push_back(15 - Idx);
16135 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
16136 }
16137 return Builder.CreateStore(
16138 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
16139 }
16140 auto *ConvTy = Int64Ty;
16141 unsigned NumElts = 0;
16142 switch (Width) {
16143 default:
16144 llvm_unreachable("width for stores must be a power of 2");
16145 case 8:
16146 ConvTy = Int64Ty;
16147 NumElts = 2;
16148 break;
16149 case 4:
16150 ConvTy = Int32Ty;
16151 NumElts = 4;
16152 break;
16153 case 2:
16154 ConvTy = Int16Ty;
16155 NumElts = 8;
16156 break;
16157 case 1:
16158 ConvTy = Int8Ty;
16159 NumElts = 16;
16160 break;
16161 }
16162 Value *Vec = Builder.CreateBitCast(
16163 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
16164 Value *Ptr =
16165 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
16166 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
16167 if (IsLE && Width > 1) {
16168 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
16169 Elt = Builder.CreateCall(F, Elt);
16170 }
16171 return Builder.CreateStore(
16172 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
16173 };
16174 unsigned Stored = 0;
16175 unsigned RemainingBytes = NumBytes;
16176 Value *Result;
16177 if (NumBytes == 16)
16178 return StoreSubVec(16, 0, 0);
16179 if (NumBytes >= 8) {
16180 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
16181 RemainingBytes -= 8;
16182 Stored += 8;
16183 }
16184 if (RemainingBytes >= 4) {
16185 Result = StoreSubVec(4, NumBytes - Stored - 4,
16186 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
16187 RemainingBytes -= 4;
16188 Stored += 4;
16189 }
16190 if (RemainingBytes >= 2) {
16191 Result = StoreSubVec(2, NumBytes - Stored - 2,
16192 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
16193 RemainingBytes -= 2;
16194 Stored += 2;
16195 }
16196 if (RemainingBytes)
16197 Result =
16198 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
16199 return Result;
16200 }
16201 // Square root
16202 case PPC::BI__builtin_vsx_xvsqrtsp:
16203 case PPC::BI__builtin_vsx_xvsqrtdp: {
16204 llvm::Type *ResultType = ConvertType(E->getType());
16205 Value *X = EmitScalarExpr(E->getArg(0));
16206 if (Builder.getIsFPConstrained()) {
16207 llvm::Function *F = CGM.getIntrinsic(
16208 Intrinsic::experimental_constrained_sqrt, ResultType);
16209 return Builder.CreateConstrainedFPCall(F, X);
16210 } else {
16211 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
16212 return Builder.CreateCall(F, X);
16213 }
16214 }
16215 // Count leading zeros
16216 case PPC::BI__builtin_altivec_vclzb:
16217 case PPC::BI__builtin_altivec_vclzh:
16218 case PPC::BI__builtin_altivec_vclzw:
16219 case PPC::BI__builtin_altivec_vclzd: {
16220 llvm::Type *ResultType = ConvertType(E->getType());
16221 Value *X = EmitScalarExpr(E->getArg(0));
16222 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
16223 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
16224 return Builder.CreateCall(F, {X, Undef});
16225 }
16226 case PPC::BI__builtin_altivec_vctzb:
16227 case PPC::BI__builtin_altivec_vctzh:
16228 case PPC::BI__builtin_altivec_vctzw:
16229 case PPC::BI__builtin_altivec_vctzd: {
16230 llvm::Type *ResultType = ConvertType(E->getType());
16231 Value *X = EmitScalarExpr(E->getArg(0));
16232 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
16233 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
16234 return Builder.CreateCall(F, {X, Undef});
16235 }
16236 case PPC::BI__builtin_altivec_vinsd:
16237 case PPC::BI__builtin_altivec_vinsw:
16238 case PPC::BI__builtin_altivec_vinsd_elt:
16239 case PPC::BI__builtin_altivec_vinsw_elt: {
16240 llvm::Type *ResultType = ConvertType(E->getType());
16241 Value *Op0 = EmitScalarExpr(E->getArg(0));
16242 Value *Op1 = EmitScalarExpr(E->getArg(1));
16243 Value *Op2 = EmitScalarExpr(E->getArg(2));
16244
16245 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
16246 BuiltinID == PPC::BI__builtin_altivec_vinsd);
16247
16248 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
16249 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
16250
16251 // The third argument must be a compile time constant.
16252 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16253 assert(ArgCI &&
16254 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
16255
16256 // Valid value for the third argument is dependent on the input type and
16257 // builtin called.
16258 int ValidMaxValue = 0;
16259 if (IsUnaligned)
16260 ValidMaxValue = (Is32bit) ? 12 : 8;
16261 else
16262 ValidMaxValue = (Is32bit) ? 3 : 1;
16263
16264 // Get value of third argument.
16265 int64_t ConstArg = ArgCI->getSExtValue();
16266
16267 // Compose range checking error message.
16268 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
16269 RangeErrMsg += " number " + llvm::to_string(ConstArg);
16270 RangeErrMsg += " is outside of the valid range [0, ";
16271 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
16272
16273 // Issue error if third argument is not within the valid range.
16274 if (ConstArg < 0 || ConstArg > ValidMaxValue)
16275 CGM.Error(E->getExprLoc(), RangeErrMsg);
16276
16277 // Input to vec_replace_elt is an element index, convert to byte index.
16278 if (!IsUnaligned) {
16279 ConstArg *= Is32bit ? 4 : 8;
16280 // Fix the constant according to endianess.
16281 if (getTarget().isLittleEndian())
16282 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
16283 }
16284
16285 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
16286 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
16287 // Casting input to vector int as per intrinsic definition.
16288 Op0 =
16289 Is32bit
16290 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
16291 : Builder.CreateBitCast(Op0,
16292 llvm::FixedVectorType::get(Int64Ty, 2));
16293 return Builder.CreateBitCast(
16294 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
16295 }
16296 case PPC::BI__builtin_altivec_vpopcntb:
16297 case PPC::BI__builtin_altivec_vpopcnth:
16298 case PPC::BI__builtin_altivec_vpopcntw:
16299 case PPC::BI__builtin_altivec_vpopcntd: {
16300 llvm::Type *ResultType = ConvertType(E->getType());
16301 Value *X = EmitScalarExpr(E->getArg(0));
16302 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
16303 return Builder.CreateCall(F, X);
16304 }
16305 case PPC::BI__builtin_altivec_vadduqm:
16306 case PPC::BI__builtin_altivec_vsubuqm: {
16307 Value *Op0 = EmitScalarExpr(E->getArg(0));
16308 Value *Op1 = EmitScalarExpr(E->getArg(1));
16309 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16310 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
16311 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
16312 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
16313 return Builder.CreateAdd(Op0, Op1, "vadduqm");
16314 else
16315 return Builder.CreateSub(Op0, Op1, "vsubuqm");
16316 }
16317 case PPC::BI__builtin_altivec_vaddcuq_c:
16318 case PPC::BI__builtin_altivec_vsubcuq_c: {
16320 Value *Op0 = EmitScalarExpr(E->getArg(0));
16321 Value *Op1 = EmitScalarExpr(E->getArg(1));
16322 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
16323 llvm::IntegerType::get(getLLVMContext(), 128), 1);
16324 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
16325 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
16326 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
16327 ? Intrinsic::ppc_altivec_vaddcuq
16328 : Intrinsic::ppc_altivec_vsubcuq;
16329 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
16330 }
16331 case PPC::BI__builtin_altivec_vaddeuqm_c:
16332 case PPC::BI__builtin_altivec_vaddecuq_c:
16333 case PPC::BI__builtin_altivec_vsubeuqm_c:
16334 case PPC::BI__builtin_altivec_vsubecuq_c: {
16336 Value *Op0 = EmitScalarExpr(E->getArg(0));
16337 Value *Op1 = EmitScalarExpr(E->getArg(1));
16338 Value *Op2 = EmitScalarExpr(E->getArg(2));
16339 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
16340 llvm::IntegerType::get(getLLVMContext(), 128), 1);
16341 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
16342 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
16343 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
16344 switch (BuiltinID) {
16345 default:
16346 llvm_unreachable("Unsupported intrinsic!");
16347 case PPC::BI__builtin_altivec_vaddeuqm_c:
16348 ID = Intrinsic::ppc_altivec_vaddeuqm;
16349 break;
16350 case PPC::BI__builtin_altivec_vaddecuq_c:
16351 ID = Intrinsic::ppc_altivec_vaddecuq;
16352 break;
16353 case PPC::BI__builtin_altivec_vsubeuqm_c:
16354 ID = Intrinsic::ppc_altivec_vsubeuqm;
16355 break;
16356 case PPC::BI__builtin_altivec_vsubecuq_c:
16357 ID = Intrinsic::ppc_altivec_vsubecuq;
16358 break;
16359 }
16360 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
16361 }
16362 // Rotate and insert under mask operation.
16363 // __rldimi(rs, is, shift, mask)
16364 // (rotl64(rs, shift) & mask) | (is & ~mask)
16365 // __rlwimi(rs, is, shift, mask)
16366 // (rotl(rs, shift) & mask) | (is & ~mask)
16367 case PPC::BI__builtin_ppc_rldimi:
16368 case PPC::BI__builtin_ppc_rlwimi: {
16369 Value *Op0 = EmitScalarExpr(E->getArg(0));
16370 Value *Op1 = EmitScalarExpr(E->getArg(1));
16371 Value *Op2 = EmitScalarExpr(E->getArg(2));
16372 Value *Op3 = EmitScalarExpr(E->getArg(3));
16373 llvm::Type *Ty = Op0->getType();
16374 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16375 if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
16376 Op2 = Builder.CreateZExt(Op2, Int64Ty);
16377 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
16378 Value *X = Builder.CreateAnd(Shift, Op3);
16379 Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
16380 return Builder.CreateOr(X, Y);
16381 }
16382 // Rotate and insert under mask operation.
16383 // __rlwnm(rs, shift, mask)
16384 // rotl(rs, shift) & mask
16385 case PPC::BI__builtin_ppc_rlwnm: {
16386 Value *Op0 = EmitScalarExpr(E->getArg(0));
16387 Value *Op1 = EmitScalarExpr(E->getArg(1));
16388 Value *Op2 = EmitScalarExpr(E->getArg(2));
16389 llvm::Type *Ty = Op0->getType();
16390 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16391 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
16392 return Builder.CreateAnd(Shift, Op2);
16393 }
16394 case PPC::BI__builtin_ppc_poppar4:
16395 case PPC::BI__builtin_ppc_poppar8: {
16396 Value *Op0 = EmitScalarExpr(E->getArg(0));
16397 llvm::Type *ArgType = Op0->getType();
16398 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
16399 Value *Tmp = Builder.CreateCall(F, Op0);
16400
16401 llvm::Type *ResultType = ConvertType(E->getType());
16402 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
16403 if (Result->getType() != ResultType)
16404 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
16405 "cast");
16406 return Result;
16407 }
16408 case PPC::BI__builtin_ppc_cmpb: {
16409 Value *Op0 = EmitScalarExpr(E->getArg(0));
16410 Value *Op1 = EmitScalarExpr(E->getArg(1));
16411 if (getTarget().getTriple().isPPC64()) {
16412 Function *F =
16413 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
16414 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
16415 }
16416 // For 32 bit, emit the code as below:
16417 // %conv = trunc i64 %a to i32
16418 // %conv1 = trunc i64 %b to i32
16419 // %shr = lshr i64 %a, 32
16420 // %conv2 = trunc i64 %shr to i32
16421 // %shr3 = lshr i64 %b, 32
16422 // %conv4 = trunc i64 %shr3 to i32
16423 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
16424 // %conv5 = zext i32 %0 to i64
16425 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
16426 // %conv614 = zext i32 %1 to i64
16427 // %shl = shl nuw i64 %conv614, 32
16428 // %or = or i64 %shl, %conv5
16429 // ret i64 %or
16430 Function *F =
16431 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
16432 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
16433 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
16434 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
16435 Value *ArgOneHi =
16436 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
16437 Value *ArgTwoHi =
16438 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
16439 Value *ResLo = Builder.CreateZExt(
16440 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
16441 Value *ResHiShift = Builder.CreateZExt(
16442 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
16443 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
16444 return Builder.CreateOr(ResLo, ResHi);
16445 }
16446 // Copy sign
16447 case PPC::BI__builtin_vsx_xvcpsgnsp:
16448 case PPC::BI__builtin_vsx_xvcpsgndp: {
16449 llvm::Type *ResultType = ConvertType(E->getType());
16450 Value *X = EmitScalarExpr(E->getArg(0));
16451 Value *Y = EmitScalarExpr(E->getArg(1));
16452 ID = Intrinsic::copysign;
16453 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
16454 return Builder.CreateCall(F, {X, Y});
16455 }
16456 // Rounding/truncation
16457 case PPC::BI__builtin_vsx_xvrspip:
16458 case PPC::BI__builtin_vsx_xvrdpip:
16459 case PPC::BI__builtin_vsx_xvrdpim:
16460 case PPC::BI__builtin_vsx_xvrspim:
16461 case PPC::BI__builtin_vsx_xvrdpi:
16462 case PPC::BI__builtin_vsx_xvrspi:
16463 case PPC::BI__builtin_vsx_xvrdpic:
16464 case PPC::BI__builtin_vsx_xvrspic:
16465 case PPC::BI__builtin_vsx_xvrdpiz:
16466 case PPC::BI__builtin_vsx_xvrspiz: {
16467 llvm::Type *ResultType = ConvertType(E->getType());
16468 Value *X = EmitScalarExpr(E->getArg(0));
16469 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
16470 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
16471 ID = Builder.getIsFPConstrained()
16472 ? Intrinsic::experimental_constrained_floor
16473 : Intrinsic::floor;
16474 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
16475 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
16476 ID = Builder.getIsFPConstrained()
16477 ? Intrinsic::experimental_constrained_round
16478 : Intrinsic::round;
16479 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
16480 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
16481 ID = Builder.getIsFPConstrained()
16482 ? Intrinsic::experimental_constrained_rint
16483 : Intrinsic::rint;
16484 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
16485 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
16486 ID = Builder.getIsFPConstrained()
16487 ? Intrinsic::experimental_constrained_ceil
16488 : Intrinsic::ceil;
16489 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
16490 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
16491 ID = Builder.getIsFPConstrained()
16492 ? Intrinsic::experimental_constrained_trunc
16493 : Intrinsic::trunc;
16494 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
16495 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
16496 : Builder.CreateCall(F, X);
16497 }
16498
16499 // Absolute value
16500 case PPC::BI__builtin_vsx_xvabsdp:
16501 case PPC::BI__builtin_vsx_xvabssp: {
16502 llvm::Type *ResultType = ConvertType(E->getType());
16503 Value *X = EmitScalarExpr(E->getArg(0));
16504 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
16505 return Builder.CreateCall(F, X);
16506 }
16507
16508 // Fastmath by default
16509 case PPC::BI__builtin_ppc_recipdivf:
16510 case PPC::BI__builtin_ppc_recipdivd:
16511 case PPC::BI__builtin_ppc_rsqrtf:
16512 case PPC::BI__builtin_ppc_rsqrtd: {
16513 FastMathFlags FMF = Builder.getFastMathFlags();
16514 Builder.getFastMathFlags().setFast();
16515 llvm::Type *ResultType = ConvertType(E->getType());
16516 Value *X = EmitScalarExpr(E->getArg(0));
16517
16518 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
16519 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
16520 Value *Y = EmitScalarExpr(E->getArg(1));
16521 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
16522 Builder.getFastMathFlags() &= (FMF);
16523 return FDiv;
16524 }
16525 auto *One = ConstantFP::get(ResultType, 1.0);
16526 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
16527 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
16528 Builder.getFastMathFlags() &= (FMF);
16529 return FDiv;
16530 }
16531 case PPC::BI__builtin_ppc_alignx: {
16532 Value *Op0 = EmitScalarExpr(E->getArg(0));
16533 Value *Op1 = EmitScalarExpr(E->getArg(1));
16534 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
16535 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
16536 AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
16537 llvm::Value::MaximumAlignment);
16538
16540 /*The expr loc is sufficient.*/ SourceLocation(),
16541 AlignmentCI, nullptr);
16542 return Op1;
16543 }
16544 case PPC::BI__builtin_ppc_rdlam: {
16545 Value *Op0 = EmitScalarExpr(E->getArg(0));
16546 Value *Op1 = EmitScalarExpr(E->getArg(1));
16547 Value *Op2 = EmitScalarExpr(E->getArg(2));
16548 llvm::Type *Ty = Op0->getType();
16549 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
16550 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16551 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
16552 return Builder.CreateAnd(Rotate, Op2);
16553 }
16554 case PPC::BI__builtin_ppc_load2r: {
16555 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
16556 Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
16557 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
16558 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
16559 }
16560 // FMA variations
16561 case PPC::BI__builtin_ppc_fnmsub:
16562 case PPC::BI__builtin_ppc_fnmsubs:
16563 case PPC::BI__builtin_vsx_xvmaddadp:
16564 case PPC::BI__builtin_vsx_xvmaddasp:
16565 case PPC::BI__builtin_vsx_xvnmaddadp:
16566 case PPC::BI__builtin_vsx_xvnmaddasp:
16567 case PPC::BI__builtin_vsx_xvmsubadp:
16568 case PPC::BI__builtin_vsx_xvmsubasp:
16569 case PPC::BI__builtin_vsx_xvnmsubadp:
16570 case PPC::BI__builtin_vsx_xvnmsubasp: {
16571 llvm::Type *ResultType = ConvertType(E->getType());
16572 Value *X = EmitScalarExpr(E->getArg(0));
16573 Value *Y = EmitScalarExpr(E->getArg(1));
16574 Value *Z = EmitScalarExpr(E->getArg(2));
16575 llvm::Function *F;
16576 if (Builder.getIsFPConstrained())
16577 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
16578 else
16579 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
16580 switch (BuiltinID) {
16581 case PPC::BI__builtin_vsx_xvmaddadp:
16582 case PPC::BI__builtin_vsx_xvmaddasp:
16583 if (Builder.getIsFPConstrained())
16584 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
16585 else
16586 return Builder.CreateCall(F, {X, Y, Z});
16587 case PPC::BI__builtin_vsx_xvnmaddadp:
16588 case PPC::BI__builtin_vsx_xvnmaddasp:
16589 if (Builder.getIsFPConstrained())
16590 return Builder.CreateFNeg(
16591 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
16592 else
16593 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
16594 case PPC::BI__builtin_vsx_xvmsubadp:
16595 case PPC::BI__builtin_vsx_xvmsubasp:
16596 if (Builder.getIsFPConstrained())
16597 return Builder.CreateConstrainedFPCall(
16598 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
16599 else
16600 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
16601 case PPC::BI__builtin_ppc_fnmsub:
16602 case PPC::BI__builtin_ppc_fnmsubs:
16603 case PPC::BI__builtin_vsx_xvnmsubadp:
16604 case PPC::BI__builtin_vsx_xvnmsubasp:
16605 if (Builder.getIsFPConstrained())
16606 return Builder.CreateFNeg(
16607 Builder.CreateConstrainedFPCall(
16608 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
16609 "neg");
16610 else
16611 return Builder.CreateCall(
16612 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
16613 }
16614 llvm_unreachable("Unknown FMA operation");
16615 return nullptr; // Suppress no-return warning
16616 }
16617
16618 case PPC::BI__builtin_vsx_insertword: {
16619 Value *Op0 = EmitScalarExpr(E->getArg(0));
16620 Value *Op1 = EmitScalarExpr(E->getArg(1));
16621 Value *Op2 = EmitScalarExpr(E->getArg(2));
16622 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
16623
16624 // Third argument is a compile time constant int. It must be clamped to
16625 // to the range [0, 12].
16626 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16627 assert(ArgCI &&
16628 "Third arg to xxinsertw intrinsic must be constant integer");
16629 const int64_t MaxIndex = 12;
16630 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
16631
16632 // The builtin semantics don't exactly match the xxinsertw instructions
16633 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
16634 // word from the first argument, and inserts it in the second argument. The
16635 // instruction extracts the word from its second input register and inserts
16636 // it into its first input register, so swap the first and second arguments.
16637 std::swap(Op0, Op1);
16638
16639 // Need to cast the second argument from a vector of unsigned int to a
16640 // vector of long long.
16641 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
16642
16643 if (getTarget().isLittleEndian()) {
16644 // Reverse the double words in the vector we will extract from.
16645 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
16646 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
16647
16648 // Reverse the index.
16649 Index = MaxIndex - Index;
16650 }
16651
16652 // Intrinsic expects the first arg to be a vector of int.
16653 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
16654 Op2 = ConstantInt::getSigned(Int32Ty, Index);
16655 return Builder.CreateCall(F, {Op0, Op1, Op2});
16656 }
16657
16658 case PPC::BI__builtin_vsx_extractuword: {
16659 Value *Op0 = EmitScalarExpr(E->getArg(0));
16660 Value *Op1 = EmitScalarExpr(E->getArg(1));
16661 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
16662
16663 // Intrinsic expects the first argument to be a vector of doublewords.
16664 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
16665
16666 // The second argument is a compile time constant int that needs to
16667 // be clamped to the range [0, 12].
16668 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
16669 assert(ArgCI &&
16670 "Second Arg to xxextractuw intrinsic must be a constant integer!");
16671 const int64_t MaxIndex = 12;
16672 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
16673
16674 if (getTarget().isLittleEndian()) {
16675 // Reverse the index.
16676 Index = MaxIndex - Index;
16677 Op1 = ConstantInt::getSigned(Int32Ty, Index);
16678
16679 // Emit the call, then reverse the double words of the results vector.
16680 Value *Call = Builder.CreateCall(F, {Op0, Op1});
16681
16682 Value *ShuffleCall =
16683 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
16684 return ShuffleCall;
16685 } else {
16686 Op1 = ConstantInt::getSigned(Int32Ty, Index);
16687 return Builder.CreateCall(F, {Op0, Op1});
16688 }
16689 }
16690
16691 case PPC::BI__builtin_vsx_xxpermdi: {
16692 Value *Op0 = EmitScalarExpr(E->getArg(0));
16693 Value *Op1 = EmitScalarExpr(E->getArg(1));
16694 Value *Op2 = EmitScalarExpr(E->getArg(2));
16695 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16696 assert(ArgCI && "Third arg must be constant integer!");
16697
16698 unsigned Index = ArgCI->getZExtValue();
16699 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
16700 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
16701
16702 // Account for endianness by treating this as just a shuffle. So we use the
16703 // same indices for both LE and BE in order to produce expected results in
16704 // both cases.
16705 int ElemIdx0 = (Index & 2) >> 1;
16706 int ElemIdx1 = 2 + (Index & 1);
16707
16708 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
16709 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
16710 QualType BIRetType = E->getType();
16711 auto RetTy = ConvertType(BIRetType);
16712 return Builder.CreateBitCast(ShuffleCall, RetTy);
16713 }
16714
16715 case PPC::BI__builtin_vsx_xxsldwi: {
16716 Value *Op0 = EmitScalarExpr(E->getArg(0));
16717 Value *Op1 = EmitScalarExpr(E->getArg(1));
16718 Value *Op2 = EmitScalarExpr(E->getArg(2));
16719 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16720 assert(ArgCI && "Third argument must be a compile time constant");
16721 unsigned Index = ArgCI->getZExtValue() & 0x3;
16722 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
16723 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
16724
16725 // Create a shuffle mask
16726 int ElemIdx0;
16727 int ElemIdx1;
16728 int ElemIdx2;
16729 int ElemIdx3;
16730 if (getTarget().isLittleEndian()) {
16731 // Little endian element N comes from element 8+N-Index of the
16732 // concatenated wide vector (of course, using modulo arithmetic on
16733 // the total number of elements).
16734 ElemIdx0 = (8 - Index) % 8;
16735 ElemIdx1 = (9 - Index) % 8;
16736 ElemIdx2 = (10 - Index) % 8;
16737 ElemIdx3 = (11 - Index) % 8;
16738 } else {
16739 // Big endian ElemIdx<N> = Index + N
16740 ElemIdx0 = Index;
16741 ElemIdx1 = Index + 1;
16742 ElemIdx2 = Index + 2;
16743 ElemIdx3 = Index + 3;
16744 }
16745
16746 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
16747 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
16748 QualType BIRetType = E->getType();
16749 auto RetTy = ConvertType(BIRetType);
16750 return Builder.CreateBitCast(ShuffleCall, RetTy);
16751 }
16752
16753 case PPC::BI__builtin_pack_vector_int128: {
16754 Value *Op0 = EmitScalarExpr(E->getArg(0));
16755 Value *Op1 = EmitScalarExpr(E->getArg(1));
16756 bool isLittleEndian = getTarget().isLittleEndian();
16757 Value *PoisonValue =
16758 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
16759 Value *Res = Builder.CreateInsertElement(
16760 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
16761 Res = Builder.CreateInsertElement(Res, Op1,
16762 (uint64_t)(isLittleEndian ? 0 : 1));
16763 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
16764 }
16765
16766 case PPC::BI__builtin_unpack_vector_int128: {
16767 Value *Op0 = EmitScalarExpr(E->getArg(0));
16768 Value *Op1 = EmitScalarExpr(E->getArg(1));
16769 ConstantInt *Index = cast<ConstantInt>(Op1);
16770 Value *Unpacked = Builder.CreateBitCast(
16771 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
16772
16773 if (getTarget().isLittleEndian())
16774 Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
16775
16776 return Builder.CreateExtractElement(Unpacked, Index);
16777 }
16778
16779 case PPC::BI__builtin_ppc_sthcx: {
16780 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
16781 Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
16782 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
16783 return Builder.CreateCall(F, {Op0, Op1});
16784 }
16785
16786 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
16787 // Some of the MMA instructions accumulate their result into an existing
16788 // accumulator whereas the others generate a new accumulator. So we need to
16789 // use custom code generation to expand a builtin call with a pointer to a
16790 // load (if the corresponding instruction accumulates its result) followed by
16791 // the call to the intrinsic and a store of the result.
16792#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
16793 case PPC::BI__builtin_##Name:
16794#include "clang/Basic/BuiltinsPPC.def"
16795 {
16797 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
16798 if (E->getArg(i)->getType()->isArrayType())
16799 Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
16800 else
16801 Ops.push_back(EmitScalarExpr(E->getArg(i)));
16802 // The first argument of these two builtins is a pointer used to store their
16803 // result. However, the llvm intrinsics return their result in multiple
16804 // return values. So, here we emit code extracting these values from the
16805 // intrinsic results and storing them using that pointer.
16806 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
16807 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
16808 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
16809 unsigned NumVecs = 2;
16810 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
16811 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
16812 NumVecs = 4;
16813 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
16814 }
16815 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
16817 Value *Vec = Builder.CreateLoad(Addr);
16818 Value *Call = Builder.CreateCall(F, {Vec});
16819 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
16820 Value *Ptr = Ops[0];
16821 for (unsigned i=0; i<NumVecs; i++) {
16822 Value *Vec = Builder.CreateExtractValue(Call, i);
16823 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
16824 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
16825 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
16826 }
16827 return Call;
16828 }
16829 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
16830 BuiltinID == PPC::BI__builtin_mma_build_acc) {
16831 // Reverse the order of the operands for LE, so the
16832 // same builtin call can be used on both LE and BE
16833 // without the need for the programmer to swap operands.
16834 // The operands are reversed starting from the second argument,
16835 // the first operand is the pointer to the pair/accumulator
16836 // that is being built.
16837 if (getTarget().isLittleEndian())
16838 std::reverse(Ops.begin() + 1, Ops.end());
16839 }
16840 bool Accumulate;
16841 switch (BuiltinID) {
16842 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
16843 case PPC::BI__builtin_##Name: \
16844 ID = Intrinsic::ppc_##Intr; \
16845 Accumulate = Acc; \
16846 break;
16847 #include "clang/Basic/BuiltinsPPC.def"
16848 }
16849 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
16850 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
16851 BuiltinID == PPC::BI__builtin_mma_lxvp ||
16852 BuiltinID == PPC::BI__builtin_mma_stxvp) {
16853 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
16854 BuiltinID == PPC::BI__builtin_mma_lxvp) {
16855 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
16856 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16857 } else {
16858 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
16859 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16860 }
16861 Ops.pop_back();
16862 llvm::Function *F = CGM.getIntrinsic(ID);
16863 return Builder.CreateCall(F, Ops, "");
16864 }
16865 SmallVector<Value*, 4> CallOps;
16866 if (Accumulate) {
16868 Value *Acc = Builder.CreateLoad(Addr);
16869 CallOps.push_back(Acc);
16870 }
16871 for (unsigned i=1; i<Ops.size(); i++)
16872 CallOps.push_back(Ops[i]);
16873 llvm::Function *F = CGM.getIntrinsic(ID);
16874 Value *Call = Builder.CreateCall(F, CallOps);
16875 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
16876 }
16877
16878 case PPC::BI__builtin_ppc_compare_and_swap:
16879 case PPC::BI__builtin_ppc_compare_and_swaplp: {
16881 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
16882 Value *OldVal = Builder.CreateLoad(OldValAddr);
16883 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
16884 LValue LV = MakeAddrLValue(Addr, AtomicTy);
16885 Value *Op2 = EmitScalarExpr(E->getArg(2));
16886 auto Pair = EmitAtomicCompareExchange(
16887 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
16888 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
16889 // Unlike c11's atomic_compare_exchange, according to
16890 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
16891 // > In either case, the contents of the memory location specified by addr
16892 // > are copied into the memory location specified by old_val_addr.
16893 // But it hasn't specified storing to OldValAddr is atomic or not and
16894 // which order to use. Now following XL's codegen, treat it as a normal
16895 // store.
16896 Value *LoadedVal = Pair.first.getScalarVal();
16897 Builder.CreateStore(LoadedVal, OldValAddr);
16898 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
16899 }
16900 case PPC::BI__builtin_ppc_fetch_and_add:
16901 case PPC::BI__builtin_ppc_fetch_and_addlp: {
16902 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
16903 llvm::AtomicOrdering::Monotonic);
16904 }
16905 case PPC::BI__builtin_ppc_fetch_and_and:
16906 case PPC::BI__builtin_ppc_fetch_and_andlp: {
16907 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
16908 llvm::AtomicOrdering::Monotonic);
16909 }
16910
16911 case PPC::BI__builtin_ppc_fetch_and_or:
16912 case PPC::BI__builtin_ppc_fetch_and_orlp: {
16913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
16914 llvm::AtomicOrdering::Monotonic);
16915 }
16916 case PPC::BI__builtin_ppc_fetch_and_swap:
16917 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
16918 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
16919 llvm::AtomicOrdering::Monotonic);
16920 }
16921 case PPC::BI__builtin_ppc_ldarx:
16922 case PPC::BI__builtin_ppc_lwarx:
16923 case PPC::BI__builtin_ppc_lharx:
16924 case PPC::BI__builtin_ppc_lbarx:
16925 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
16926 case PPC::BI__builtin_ppc_mfspr: {
16927 Value *Op0 = EmitScalarExpr(E->getArg(0));
16928 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
16929 ? Int32Ty
16930 : Int64Ty;
16931 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
16932 return Builder.CreateCall(F, {Op0});
16933 }
16934 case PPC::BI__builtin_ppc_mtspr: {
16935 Value *Op0 = EmitScalarExpr(E->getArg(0));
16936 Value *Op1 = EmitScalarExpr(E->getArg(1));
16937 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
16938 ? Int32Ty
16939 : Int64Ty;
16940 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
16941 return Builder.CreateCall(F, {Op0, Op1});
16942 }
16943 case PPC::BI__builtin_ppc_popcntb: {
16944 Value *ArgValue = EmitScalarExpr(E->getArg(0));
16945 llvm::Type *ArgType = ArgValue->getType();
16946 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
16947 return Builder.CreateCall(F, {ArgValue}, "popcntb");
16948 }
16949 case PPC::BI__builtin_ppc_mtfsf: {
16950 // The builtin takes a uint32 that needs to be cast to an
16951 // f64 to be passed to the intrinsic.
16952 Value *Op0 = EmitScalarExpr(E->getArg(0));
16953 Value *Op1 = EmitScalarExpr(E->getArg(1));
16954 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
16955 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
16956 return Builder.CreateCall(F, {Op0, Cast}, "");
16957 }
16958
16959 case PPC::BI__builtin_ppc_swdiv_nochk:
16960 case PPC::BI__builtin_ppc_swdivs_nochk: {
16961 Value *Op0 = EmitScalarExpr(E->getArg(0));
16962 Value *Op1 = EmitScalarExpr(E->getArg(1));
16963 FastMathFlags FMF = Builder.getFastMathFlags();
16964 Builder.getFastMathFlags().setFast();
16965 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
16966 Builder.getFastMathFlags() &= (FMF);
16967 return FDiv;
16968 }
16969 case PPC::BI__builtin_ppc_fric:
16971 *this, E, Intrinsic::rint,
16972 Intrinsic::experimental_constrained_rint))
16973 .getScalarVal();
16974 case PPC::BI__builtin_ppc_frim:
16975 case PPC::BI__builtin_ppc_frims:
16977 *this, E, Intrinsic::floor,
16978 Intrinsic::experimental_constrained_floor))
16979 .getScalarVal();
16980 case PPC::BI__builtin_ppc_frin:
16981 case PPC::BI__builtin_ppc_frins:
16983 *this, E, Intrinsic::round,
16984 Intrinsic::experimental_constrained_round))
16985 .getScalarVal();
16986 case PPC::BI__builtin_ppc_frip:
16987 case PPC::BI__builtin_ppc_frips:
16989 *this, E, Intrinsic::ceil,
16990 Intrinsic::experimental_constrained_ceil))
16991 .getScalarVal();
16992 case PPC::BI__builtin_ppc_friz:
16993 case PPC::BI__builtin_ppc_frizs:
16995 *this, E, Intrinsic::trunc,
16996 Intrinsic::experimental_constrained_trunc))
16997 .getScalarVal();
16998 case PPC::BI__builtin_ppc_fsqrt:
16999 case PPC::BI__builtin_ppc_fsqrts:
17001 *this, E, Intrinsic::sqrt,
17002 Intrinsic::experimental_constrained_sqrt))
17003 .getScalarVal();
17004 case PPC::BI__builtin_ppc_test_data_class: {
17005 Value *Op0 = EmitScalarExpr(E->getArg(0));
17006 Value *Op1 = EmitScalarExpr(E->getArg(1));
17007 return Builder.CreateCall(
17008 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17009 {Op0, Op1}, "test_data_class");
17010 }
17011 case PPC::BI__builtin_ppc_maxfe: {
17012 Value *Op0 = EmitScalarExpr(E->getArg(0));
17013 Value *Op1 = EmitScalarExpr(E->getArg(1));
17014 Value *Op2 = EmitScalarExpr(E->getArg(2));
17015 Value *Op3 = EmitScalarExpr(E->getArg(3));
17016 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17017 {Op0, Op1, Op2, Op3});
17018 }
17019 case PPC::BI__builtin_ppc_maxfl: {
17020 Value *Op0 = EmitScalarExpr(E->getArg(0));
17021 Value *Op1 = EmitScalarExpr(E->getArg(1));
17022 Value *Op2 = EmitScalarExpr(E->getArg(2));
17023 Value *Op3 = EmitScalarExpr(E->getArg(3));
17024 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17025 {Op0, Op1, Op2, Op3});
17026 }
17027 case PPC::BI__builtin_ppc_maxfs: {
17028 Value *Op0 = EmitScalarExpr(E->getArg(0));
17029 Value *Op1 = EmitScalarExpr(E->getArg(1));
17030 Value *Op2 = EmitScalarExpr(E->getArg(2));
17031 Value *Op3 = EmitScalarExpr(E->getArg(3));
17032 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17033 {Op0, Op1, Op2, Op3});
17034 }
17035 case PPC::BI__builtin_ppc_minfe: {
17036 Value *Op0 = EmitScalarExpr(E->getArg(0));
17037 Value *Op1 = EmitScalarExpr(E->getArg(1));
17038 Value *Op2 = EmitScalarExpr(E->getArg(2));
17039 Value *Op3 = EmitScalarExpr(E->getArg(3));
17040 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17041 {Op0, Op1, Op2, Op3});
17042 }
17043 case PPC::BI__builtin_ppc_minfl: {
17044 Value *Op0 = EmitScalarExpr(E->getArg(0));
17045 Value *Op1 = EmitScalarExpr(E->getArg(1));
17046 Value *Op2 = EmitScalarExpr(E->getArg(2));
17047 Value *Op3 = EmitScalarExpr(E->getArg(3));
17048 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17049 {Op0, Op1, Op2, Op3});
17050 }
17051 case PPC::BI__builtin_ppc_minfs: {
17052 Value *Op0 = EmitScalarExpr(E->getArg(0));
17053 Value *Op1 = EmitScalarExpr(E->getArg(1));
17054 Value *Op2 = EmitScalarExpr(E->getArg(2));
17055 Value *Op3 = EmitScalarExpr(E->getArg(3));
17056 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17057 {Op0, Op1, Op2, Op3});
17058 }
17059 case PPC::BI__builtin_ppc_swdiv:
17060 case PPC::BI__builtin_ppc_swdivs: {
17061 Value *Op0 = EmitScalarExpr(E->getArg(0));
17062 Value *Op1 = EmitScalarExpr(E->getArg(1));
17063 return Builder.CreateFDiv(Op0, Op1, "swdiv");
17064 }
17065 }
17066}
17067
17068namespace {
17069// If \p E is not null pointer, insert address space cast to match return
17070// type of \p E if necessary.
17071Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
17072 const CallExpr *E = nullptr) {
17073 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
17074 auto *Call = CGF.Builder.CreateCall(F);
17075 Call->addRetAttr(
17076 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
17077 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
17078 if (!E)
17079 return Call;
17080 QualType BuiltinRetType = E->getType();
17081 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
17082 if (RetTy == Call->getType())
17083 return Call;
17084 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
17085}
17086
17087Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
17088 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
17089 auto *Call = CGF.Builder.CreateCall(F);
17090 Call->addRetAttr(
17091 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
17092 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
17093 return Call;
17094}
17095
17096// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17097Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
17098 bool IsCOV_5 = CGF.getTarget().getTargetOpts().CodeObjectVersion ==
17100 Constant *Offset;
17101 Value *DP;
17102 if (IsCOV_5) {
17103 // Indexing the implicit kernarg segment.
17104 Offset = llvm::ConstantInt::get(CGF.Int32Ty, 12 + Index * 2);
17105 DP = EmitAMDGPUImplicitArgPtr(CGF);
17106 } else {
17107 // Indexing the HSA kernel_dispatch_packet struct.
17108 Offset = llvm::ConstantInt::get(CGF.Int32Ty, 4 + Index * 2);
17109 DP = EmitAMDGPUDispatchPtr(CGF);
17110 }
17111
17112 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
17113 auto *LD = CGF.Builder.CreateLoad(
17115 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
17116 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
17117 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
17118 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
17119 LD->setMetadata(llvm::LLVMContext::MD_noundef,
17120 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17121 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17122 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17123 return LD;
17124}
17125
17126// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17127Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
17128 const unsigned XOffset = 12;
17129 auto *DP = EmitAMDGPUDispatchPtr(CGF);
17130 // Indexing the HSA kernel_dispatch_packet struct.
17131 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
17132 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
17133 auto *LD = CGF.Builder.CreateLoad(
17135 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17136 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17137 return LD;
17138}
17139} // namespace
17140
17141// For processing memory ordering and memory scope arguments of various
17142// amdgcn builtins.
17143// \p Order takes a C++11 comptabile memory-ordering specifier and converts
17144// it into LLVM's memory ordering specifier using atomic C ABI, and writes
17145// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
17146// specific SyncScopeID and writes it to \p SSID.
17148 llvm::AtomicOrdering &AO,
17149 llvm::SyncScope::ID &SSID) {
17150 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
17151
17152 // Map C11/C++11 memory ordering to LLVM memory ordering
17153 assert(llvm::isValidAtomicOrderingCABI(ord));
17154 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
17155 case llvm::AtomicOrderingCABI::acquire:
17156 case llvm::AtomicOrderingCABI::consume:
17157 AO = llvm::AtomicOrdering::Acquire;
17158 break;
17159 case llvm::AtomicOrderingCABI::release:
17160 AO = llvm::AtomicOrdering::Release;
17161 break;
17162 case llvm::AtomicOrderingCABI::acq_rel:
17163 AO = llvm::AtomicOrdering::AcquireRelease;
17164 break;
17165 case llvm::AtomicOrderingCABI::seq_cst:
17166 AO = llvm::AtomicOrdering::SequentiallyConsistent;
17167 break;
17168 case llvm::AtomicOrderingCABI::relaxed:
17169 AO = llvm::AtomicOrdering::Monotonic;
17170 break;
17171 }
17172
17173 StringRef scp;
17174 llvm::getConstantStringInfo(Scope, scp);
17175 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
17176}
17177
17179 const CallExpr *E) {
17180 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
17181 llvm::SyncScope::ID SSID;
17182 switch (BuiltinID) {
17183 case AMDGPU::BI__builtin_amdgcn_div_scale:
17184 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
17185 // Translate from the intrinsics's struct return to the builtin's out
17186 // argument.
17187
17188 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
17189
17190 llvm::Value *X = EmitScalarExpr(E->getArg(0));
17191 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
17192 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
17193
17194 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
17195 X->getType());
17196
17197 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
17198
17199 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
17200 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
17201
17202 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
17203
17204 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
17205 Builder.CreateStore(FlagExt, FlagOutPtr);
17206 return Result;
17207 }
17208 case AMDGPU::BI__builtin_amdgcn_div_fmas:
17209 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
17210 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17211 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17212 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17213 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
17214
17215 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
17216 Src0->getType());
17217 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
17218 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
17219 }
17220
17221 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
17222 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
17223 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
17224 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
17225 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
17226 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
17228 for (unsigned I = 0; I != E->getNumArgs(); ++I)
17229 Args.push_back(EmitScalarExpr(E->getArg(I)));
17230 assert(Args.size() == 5 || Args.size() == 6);
17231 if (Args.size() == 5)
17232 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
17233 Function *F =
17234 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
17235 return Builder.CreateCall(F, Args);
17236 }
17237 case AMDGPU::BI__builtin_amdgcn_div_fixup:
17238 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
17239 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
17240 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
17241 case AMDGPU::BI__builtin_amdgcn_trig_preop:
17242 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
17243 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
17244 case AMDGPU::BI__builtin_amdgcn_rcp:
17245 case AMDGPU::BI__builtin_amdgcn_rcpf:
17246 case AMDGPU::BI__builtin_amdgcn_rcph:
17247 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
17248 case AMDGPU::BI__builtin_amdgcn_sqrt:
17249 case AMDGPU::BI__builtin_amdgcn_sqrtf:
17250 case AMDGPU::BI__builtin_amdgcn_sqrth:
17251 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
17252 case AMDGPU::BI__builtin_amdgcn_rsq:
17253 case AMDGPU::BI__builtin_amdgcn_rsqf:
17254 case AMDGPU::BI__builtin_amdgcn_rsqh:
17255 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
17256 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
17257 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
17258 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
17259 case AMDGPU::BI__builtin_amdgcn_sinf:
17260 case AMDGPU::BI__builtin_amdgcn_sinh:
17261 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
17262 case AMDGPU::BI__builtin_amdgcn_cosf:
17263 case AMDGPU::BI__builtin_amdgcn_cosh:
17264 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
17265 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
17266 return EmitAMDGPUDispatchPtr(*this, E);
17267 case AMDGPU::BI__builtin_amdgcn_logf:
17268 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
17269 case AMDGPU::BI__builtin_amdgcn_exp2f:
17270 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
17271 case AMDGPU::BI__builtin_amdgcn_log_clampf:
17272 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
17273 case AMDGPU::BI__builtin_amdgcn_ldexp:
17274 case AMDGPU::BI__builtin_amdgcn_ldexpf:
17275 case AMDGPU::BI__builtin_amdgcn_ldexph: {
17276 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17277 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17278 llvm::Function *F =
17279 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
17280 return Builder.CreateCall(F, {Src0, Src1});
17281 }
17282 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
17283 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
17284 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
17285 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
17286 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
17287 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
17288 Value *Src0 = EmitScalarExpr(E->getArg(0));
17289 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
17290 { Builder.getInt32Ty(), Src0->getType() });
17291 return Builder.CreateCall(F, Src0);
17292 }
17293 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
17294 Value *Src0 = EmitScalarExpr(E->getArg(0));
17295 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
17296 { Builder.getInt16Ty(), Src0->getType() });
17297 return Builder.CreateCall(F, Src0);
17298 }
17299 case AMDGPU::BI__builtin_amdgcn_fract:
17300 case AMDGPU::BI__builtin_amdgcn_fractf:
17301 case AMDGPU::BI__builtin_amdgcn_fracth:
17302 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
17303 case AMDGPU::BI__builtin_amdgcn_lerp:
17304 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
17305 case AMDGPU::BI__builtin_amdgcn_ubfe:
17306 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
17307 case AMDGPU::BI__builtin_amdgcn_sbfe:
17308 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
17309 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
17310 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
17311 llvm::Type *ResultType = ConvertType(E->getType());
17312 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
17313 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
17314 return Builder.CreateCall(F, { Src });
17315 }
17316 case AMDGPU::BI__builtin_amdgcn_uicmp:
17317 case AMDGPU::BI__builtin_amdgcn_uicmpl:
17318 case AMDGPU::BI__builtin_amdgcn_sicmp:
17319 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
17320 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17321 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17322 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17323
17324 // FIXME-GFX10: How should 32 bit mask be handled?
17325 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
17326 { Builder.getInt64Ty(), Src0->getType() });
17327 return Builder.CreateCall(F, { Src0, Src1, Src2 });
17328 }
17329 case AMDGPU::BI__builtin_amdgcn_fcmp:
17330 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
17331 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17332 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17333 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17334
17335 // FIXME-GFX10: How should 32 bit mask be handled?
17336 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
17337 { Builder.getInt64Ty(), Src0->getType() });
17338 return Builder.CreateCall(F, { Src0, Src1, Src2 });
17339 }
17340 case AMDGPU::BI__builtin_amdgcn_class:
17341 case AMDGPU::BI__builtin_amdgcn_classf:
17342 case AMDGPU::BI__builtin_amdgcn_classh:
17343 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
17344 case AMDGPU::BI__builtin_amdgcn_fmed3f:
17345 case AMDGPU::BI__builtin_amdgcn_fmed3h:
17346 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
17347 case AMDGPU::BI__builtin_amdgcn_ds_append:
17348 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
17349 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
17350 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
17351 Value *Src0 = EmitScalarExpr(E->getArg(0));
17352 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
17353 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
17354 }
17355 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
17356 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
17357 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
17358 Intrinsic::ID Intrin;
17359 switch (BuiltinID) {
17360 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
17361 Intrin = Intrinsic::amdgcn_ds_fadd;
17362 break;
17363 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
17364 Intrin = Intrinsic::amdgcn_ds_fmin;
17365 break;
17366 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
17367 Intrin = Intrinsic::amdgcn_ds_fmax;
17368 break;
17369 }
17370 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17371 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17372 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17373 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
17374 llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
17375 llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
17376 llvm::FunctionType *FTy = F->getFunctionType();
17377 llvm::Type *PTy = FTy->getParamType(0);
17378 Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
17379 return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
17380 }
17381 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
17382 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
17383 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
17384 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
17385 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
17386 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
17387 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
17388 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
17389 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
17390 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
17391 Intrinsic::ID IID;
17392 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
17393 switch (BuiltinID) {
17394 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
17395 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
17396 IID = Intrinsic::amdgcn_global_atomic_fadd;
17397 break;
17398 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
17399 ArgTy = llvm::FixedVectorType::get(
17400 llvm::Type::getHalfTy(getLLVMContext()), 2);
17401 IID = Intrinsic::amdgcn_global_atomic_fadd;
17402 break;
17403 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
17404 IID = Intrinsic::amdgcn_global_atomic_fadd;
17405 break;
17406 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
17407 IID = Intrinsic::amdgcn_global_atomic_fmin;
17408 break;
17409 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
17410 IID = Intrinsic::amdgcn_global_atomic_fmax;
17411 break;
17412 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
17413 IID = Intrinsic::amdgcn_flat_atomic_fadd;
17414 break;
17415 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
17416 IID = Intrinsic::amdgcn_flat_atomic_fmin;
17417 break;
17418 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
17419 IID = Intrinsic::amdgcn_flat_atomic_fmax;
17420 break;
17421 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
17422 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
17423 IID = Intrinsic::amdgcn_flat_atomic_fadd;
17424 break;
17425 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
17426 ArgTy = llvm::FixedVectorType::get(
17427 llvm::Type::getHalfTy(getLLVMContext()), 2);
17428 IID = Intrinsic::amdgcn_flat_atomic_fadd;
17429 break;
17430 }
17431 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
17432 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
17433 llvm::Function *F =
17434 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
17435 return Builder.CreateCall(F, {Addr, Val});
17436 }
17437 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
17438 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
17439 Intrinsic::ID IID;
17440 switch (BuiltinID) {
17441 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
17442 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
17443 break;
17444 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
17445 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
17446 break;
17447 }
17448 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
17449 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
17450 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
17451 return Builder.CreateCall(F, {Addr, Val});
17452 }
17453 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
17454 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
17455 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
17456 Intrinsic::ID IID;
17457 llvm::Type *ArgTy;
17458 switch (BuiltinID) {
17459 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
17460 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
17461 IID = Intrinsic::amdgcn_ds_fadd;
17462 break;
17463 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
17464 ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
17465 IID = Intrinsic::amdgcn_ds_fadd;
17466 break;
17467 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
17468 ArgTy = llvm::FixedVectorType::get(
17469 llvm::Type::getHalfTy(getLLVMContext()), 2);
17470 IID = Intrinsic::amdgcn_ds_fadd;
17471 break;
17472 }
17473 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
17474 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
17475 llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
17476 llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
17477 llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
17478 llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
17479 llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
17480 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
17481 }
17482 case AMDGPU::BI__builtin_amdgcn_read_exec: {
17483 CallInst *CI = cast<CallInst>(
17485 CI->setConvergent();
17486 return CI;
17487 }
17488 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
17489 case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
17490 StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
17491 "exec_lo" : "exec_hi";
17492 CallInst *CI = cast<CallInst>(
17494 CI->setConvergent();
17495 return CI;
17496 }
17497 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
17498 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
17499 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
17500 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
17501 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
17502 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
17503 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
17504 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
17505 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
17506 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
17507
17508 // The builtins take these arguments as vec4 where the last element is
17509 // ignored. The intrinsic takes them as vec3.
17510 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
17511 ArrayRef<int>{0, 1, 2});
17512 RayDir =
17513 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
17514 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
17515 ArrayRef<int>{0, 1, 2});
17516
17517 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
17518 {NodePtr->getType(), RayDir->getType()});
17519 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
17520 RayInverseDir, TextureDescr});
17521 }
17522
17523 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
17525 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
17526 Args.push_back(EmitScalarExpr(E->getArg(i)));
17527
17528 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
17529 Value *Call = Builder.CreateCall(F, Args);
17530 Value *Rtn = Builder.CreateExtractValue(Call, 0);
17531 Value *A = Builder.CreateExtractValue(Call, 1);
17532 llvm::Type *RetTy = ConvertType(E->getType());
17533 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
17534 (uint64_t)0);
17535 return Builder.CreateInsertElement(I0, A, 1);
17536 }
17537
17538 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
17539 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
17540 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
17541 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
17542 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
17543 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
17544 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
17545 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
17546 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
17547 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
17548 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
17549 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: {
17550
17551 // These operations perform a matrix multiplication and accumulation of
17552 // the form:
17553 // D = A * B + C
17554 // The return type always matches the type of matrix C.
17555 unsigned ArgForMatchingRetType;
17556 unsigned BuiltinWMMAOp;
17557
17558 switch (BuiltinID) {
17559 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
17560 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
17561 ArgForMatchingRetType = 2;
17562 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
17563 break;
17564 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
17565 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
17566 ArgForMatchingRetType = 2;
17567 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
17568 break;
17569 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
17570 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
17571 ArgForMatchingRetType = 2;
17572 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
17573 break;
17574 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
17575 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
17576 ArgForMatchingRetType = 2;
17577 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
17578 break;
17579 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
17580 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
17581 ArgForMatchingRetType = 4;
17582 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
17583 break;
17584 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
17585 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
17586 ArgForMatchingRetType = 4;
17587 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
17588 break;
17589 }
17590
17592 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
17593 Args.push_back(EmitScalarExpr(E->getArg(i)));
17594
17595 Function *F = CGM.getIntrinsic(BuiltinWMMAOp,
17596 {Args[ArgForMatchingRetType]->getType()});
17597
17598 return Builder.CreateCall(F, Args);
17599 }
17600
17601 // amdgcn workitem
17602 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
17603 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
17604 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
17605 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
17606 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
17607 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
17608
17609 // amdgcn workgroup size
17610 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
17611 return EmitAMDGPUWorkGroupSize(*this, 0);
17612 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
17613 return EmitAMDGPUWorkGroupSize(*this, 1);
17614 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
17615 return EmitAMDGPUWorkGroupSize(*this, 2);
17616
17617 // amdgcn grid size
17618 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
17619 return EmitAMDGPUGridSize(*this, 0);
17620 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
17621 return EmitAMDGPUGridSize(*this, 1);
17622 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
17623 return EmitAMDGPUGridSize(*this, 2);
17624
17625 // r600 intrinsics
17626 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
17627 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
17628 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
17629 case AMDGPU::BI__builtin_r600_read_tidig_x:
17630 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
17631 case AMDGPU::BI__builtin_r600_read_tidig_y:
17632 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
17633 case AMDGPU::BI__builtin_r600_read_tidig_z:
17634 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
17635 case AMDGPU::BI__builtin_amdgcn_alignbit: {
17636 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17637 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17638 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17639 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
17640 return Builder.CreateCall(F, { Src0, Src1, Src2 });
17641 }
17642 case AMDGPU::BI__builtin_amdgcn_fence: {
17644 EmitScalarExpr(E->getArg(1)), AO, SSID);
17645 return Builder.CreateFence(AO, SSID);
17646 }
17647 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
17648 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
17649 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
17650 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
17651 llvm::AtomicRMWInst::BinOp BinOp;
17652 switch (BuiltinID) {
17653 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
17654 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
17655 BinOp = llvm::AtomicRMWInst::UIncWrap;
17656 break;
17657 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
17658 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
17659 BinOp = llvm::AtomicRMWInst::UDecWrap;
17660 break;
17661 }
17662
17663 Value *Ptr = EmitScalarExpr(E->getArg(0));
17664 Value *Val = EmitScalarExpr(E->getArg(1));
17665
17667 EmitScalarExpr(E->getArg(3)), AO, SSID);
17668
17669 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
17670 bool Volatile =
17671 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
17672
17673 llvm::AtomicRMWInst *RMW =
17674 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
17675 if (Volatile)
17676 RMW->setVolatile(true);
17677 return RMW;
17678 }
17679 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
17680 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
17681 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
17682 llvm::Type *ResultType = ConvertType(E->getType());
17683 // s_sendmsg_rtn is mangled using return type only.
17684 Function *F =
17685 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
17686 return Builder.CreateCall(F, {Arg});
17687 }
17688 default:
17689 return nullptr;
17690 }
17691}
17692
17693/// Handle a SystemZ function in which the final argument is a pointer
17694/// to an int that receives the post-instruction CC value. At the LLVM level
17695/// this is represented as a function that returns a {result, cc} pair.
17697 unsigned IntrinsicID,
17698 const CallExpr *E) {
17699 unsigned NumArgs = E->getNumArgs() - 1;
17700 SmallVector<Value *, 8> Args(NumArgs);
17701 for (unsigned I = 0; I < NumArgs; ++I)
17702 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
17703 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
17704 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
17705 Value *Call = CGF.Builder.CreateCall(F, Args);
17706 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
17707 CGF.Builder.CreateStore(CC, CCPtr);
17708 return CGF.Builder.CreateExtractValue(Call, 0);
17709}
17710
17712 const CallExpr *E) {
17713 switch (BuiltinID) {
17714 case SystemZ::BI__builtin_tbegin: {
17715 Value *TDB = EmitScalarExpr(E->getArg(0));
17716 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
17717 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
17718 return Builder.CreateCall(F, {TDB, Control});
17719 }
17720 case SystemZ::BI__builtin_tbegin_nofloat: {
17721 Value *TDB = EmitScalarExpr(E->getArg(0));
17722 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
17723 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
17724 return Builder.CreateCall(F, {TDB, Control});
17725 }
17726 case SystemZ::BI__builtin_tbeginc: {
17727 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
17728 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
17729 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
17730 return Builder.CreateCall(F, {TDB, Control});
17731 }
17732 case SystemZ::BI__builtin_tabort: {
17733 Value *Data = EmitScalarExpr(E->getArg(0));
17734 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
17735 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
17736 }
17737 case SystemZ::BI__builtin_non_tx_store: {
17739 Value *Data = EmitScalarExpr(E->getArg(1));
17740 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
17741 return Builder.CreateCall(F, {Data, Address});
17742 }
17743
17744 // Vector builtins. Note that most vector builtins are mapped automatically
17745 // to target-specific LLVM intrinsics. The ones handled specially here can
17746 // be represented via standard LLVM IR, which is preferable to enable common
17747 // LLVM optimizations.
17748
17749 case SystemZ::BI__builtin_s390_vpopctb:
17750 case SystemZ::BI__builtin_s390_vpopcth:
17751 case SystemZ::BI__builtin_s390_vpopctf:
17752 case SystemZ::BI__builtin_s390_vpopctg: {
17753 llvm::Type *ResultType = ConvertType(E->getType());
17754 Value *X = EmitScalarExpr(E->getArg(0));
17755 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17756 return Builder.CreateCall(F, X);
17757 }
17758
17759 case SystemZ::BI__builtin_s390_vclzb:
17760 case SystemZ::BI__builtin_s390_vclzh:
17761 case SystemZ::BI__builtin_s390_vclzf:
17762 case SystemZ::BI__builtin_s390_vclzg: {
17763 llvm::Type *ResultType = ConvertType(E->getType());
17764 Value *X = EmitScalarExpr(E->getArg(0));
17765 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17766 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17767 return Builder.CreateCall(F, {X, Undef});
17768 }
17769
17770 case SystemZ::BI__builtin_s390_vctzb:
17771 case SystemZ::BI__builtin_s390_vctzh:
17772 case SystemZ::BI__builtin_s390_vctzf:
17773 case SystemZ::BI__builtin_s390_vctzg: {
17774 llvm::Type *ResultType = ConvertType(E->getType());
17775 Value *X = EmitScalarExpr(E->getArg(0));
17776 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17777 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17778 return Builder.CreateCall(F, {X, Undef});
17779 }
17780
17781 case SystemZ::BI__builtin_s390_vfsqsb:
17782 case SystemZ::BI__builtin_s390_vfsqdb: {
17783 llvm::Type *ResultType = ConvertType(E->getType());
17784 Value *X = EmitScalarExpr(E->getArg(0));
17785 if (Builder.getIsFPConstrained()) {
17786 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
17787 return Builder.CreateConstrainedFPCall(F, { X });
17788 } else {
17789 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17790 return Builder.CreateCall(F, X);
17791 }
17792 }
17793 case SystemZ::BI__builtin_s390_vfmasb:
17794 case SystemZ::BI__builtin_s390_vfmadb: {
17795 llvm::Type *ResultType = ConvertType(E->getType());
17796 Value *X = EmitScalarExpr(E->getArg(0));
17797 Value *Y = EmitScalarExpr(E->getArg(1));
17798 Value *Z = EmitScalarExpr(E->getArg(2));
17799 if (Builder.getIsFPConstrained()) {
17800 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17801 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17802 } else {
17803 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17804 return Builder.CreateCall(F, {X, Y, Z});
17805 }
17806 }
17807 case SystemZ::BI__builtin_s390_vfmssb:
17808 case SystemZ::BI__builtin_s390_vfmsdb: {
17809 llvm::Type *ResultType = ConvertType(E->getType());
17810 Value *X = EmitScalarExpr(E->getArg(0));
17811 Value *Y = EmitScalarExpr(E->getArg(1));
17812 Value *Z = EmitScalarExpr(E->getArg(2));
17813 if (Builder.getIsFPConstrained()) {
17814 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17815 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17816 } else {
17817 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17818 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17819 }
17820 }
17821 case SystemZ::BI__builtin_s390_vfnmasb:
17822 case SystemZ::BI__builtin_s390_vfnmadb: {
17823 llvm::Type *ResultType = ConvertType(E->getType());
17824 Value *X = EmitScalarExpr(E->getArg(0));
17825 Value *Y = EmitScalarExpr(E->getArg(1));
17826 Value *Z = EmitScalarExpr(E->getArg(2));
17827 if (Builder.getIsFPConstrained()) {
17828 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17829 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17830 } else {
17831 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17832 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17833 }
17834 }
17835 case SystemZ::BI__builtin_s390_vfnmssb:
17836 case SystemZ::BI__builtin_s390_vfnmsdb: {
17837 llvm::Type *ResultType = ConvertType(E->getType());
17838 Value *X = EmitScalarExpr(E->getArg(0));
17839 Value *Y = EmitScalarExpr(E->getArg(1));
17840 Value *Z = EmitScalarExpr(E->getArg(2));
17841 if (Builder.getIsFPConstrained()) {
17842 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17843 Value *NegZ = Builder.CreateFNeg(Z, "sub");
17844 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
17845 } else {
17846 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17847 Value *NegZ = Builder.CreateFNeg(Z, "neg");
17848 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
17849 }
17850 }
17851 case SystemZ::BI__builtin_s390_vflpsb:
17852 case SystemZ::BI__builtin_s390_vflpdb: {
17853 llvm::Type *ResultType = ConvertType(E->getType());
17854 Value *X = EmitScalarExpr(E->getArg(0));
17855 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17856 return Builder.CreateCall(F, X);
17857 }
17858 case SystemZ::BI__builtin_s390_vflnsb:
17859 case SystemZ::BI__builtin_s390_vflndb: {
17860 llvm::Type *ResultType = ConvertType(E->getType());
17861 Value *X = EmitScalarExpr(E->getArg(0));
17862 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17863 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
17864 }
17865 case SystemZ::BI__builtin_s390_vfisb:
17866 case SystemZ::BI__builtin_s390_vfidb: {
17867 llvm::Type *ResultType = ConvertType(E->getType());
17868 Value *X = EmitScalarExpr(E->getArg(0));
17869 // Constant-fold the M4 and M5 mask arguments.
17870 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
17871 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
17872 // Check whether this instance can be represented via a LLVM standard
17873 // intrinsic. We only support some combinations of M4 and M5.
17874 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17875 Intrinsic::ID CI;
17876 switch (M4.getZExtValue()) {
17877 default: break;
17878 case 0: // IEEE-inexact exception allowed
17879 switch (M5.getZExtValue()) {
17880 default: break;
17881 case 0: ID = Intrinsic::rint;
17882 CI = Intrinsic::experimental_constrained_rint; break;
17883 }
17884 break;
17885 case 4: // IEEE-inexact exception suppressed
17886 switch (M5.getZExtValue()) {
17887 default: break;
17888 case 0: ID = Intrinsic::nearbyint;
17889 CI = Intrinsic::experimental_constrained_nearbyint; break;
17890 case 1: ID = Intrinsic::round;
17891 CI = Intrinsic::experimental_constrained_round; break;
17892 case 5: ID = Intrinsic::trunc;
17893 CI = Intrinsic::experimental_constrained_trunc; break;
17894 case 6: ID = Intrinsic::ceil;
17895 CI = Intrinsic::experimental_constrained_ceil; break;
17896 case 7: ID = Intrinsic::floor;
17897 CI = Intrinsic::experimental_constrained_floor; break;
17898 }
17899 break;
17900 }
17901 if (ID != Intrinsic::not_intrinsic) {
17902 if (Builder.getIsFPConstrained()) {
17903 Function *F = CGM.getIntrinsic(CI, ResultType);
17904 return Builder.CreateConstrainedFPCall(F, X);
17905 } else {
17906 Function *F = CGM.getIntrinsic(ID, ResultType);
17907 return Builder.CreateCall(F, X);
17908 }
17909 }
17910 switch (BuiltinID) { // FIXME: constrained version?
17911 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
17912 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
17913 default: llvm_unreachable("Unknown BuiltinID");
17914 }
17915 Function *F = CGM.getIntrinsic(ID);
17916 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
17917 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
17918 return Builder.CreateCall(F, {X, M4Value, M5Value});
17919 }
17920 case SystemZ::BI__builtin_s390_vfmaxsb:
17921 case SystemZ::BI__builtin_s390_vfmaxdb: {
17922 llvm::Type *ResultType = ConvertType(E->getType());
17923 Value *X = EmitScalarExpr(E->getArg(0));
17924 Value *Y = EmitScalarExpr(E->getArg(1));
17925 // Constant-fold the M4 mask argument.
17926 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
17927 // Check whether this instance can be represented via a LLVM standard
17928 // intrinsic. We only support some values of M4.
17929 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17930 Intrinsic::ID CI;
17931 switch (M4.getZExtValue()) {
17932 default: break;
17933 case 4: ID = Intrinsic::maxnum;
17934 CI = Intrinsic::experimental_constrained_maxnum; break;
17935 }
17936 if (ID != Intrinsic::not_intrinsic) {
17937 if (Builder.getIsFPConstrained()) {
17938 Function *F = CGM.getIntrinsic(CI, ResultType);
17939 return Builder.CreateConstrainedFPCall(F, {X, Y});
17940 } else {
17941 Function *F = CGM.getIntrinsic(ID, ResultType);
17942 return Builder.CreateCall(F, {X, Y});
17943 }
17944 }
17945 switch (BuiltinID) {
17946 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
17947 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
17948 default: llvm_unreachable("Unknown BuiltinID");
17949 }
17950 Function *F = CGM.getIntrinsic(ID);
17951 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
17952 return Builder.CreateCall(F, {X, Y, M4Value});
17953 }
17954 case SystemZ::BI__builtin_s390_vfminsb:
17955 case SystemZ::BI__builtin_s390_vfmindb: {
17956 llvm::Type *ResultType = ConvertType(E->getType());
17957 Value *X = EmitScalarExpr(E->getArg(0));
17958 Value *Y = EmitScalarExpr(E->getArg(1));
17959 // Constant-fold the M4 mask argument.
17960 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
17961 // Check whether this instance can be represented via a LLVM standard
17962 // intrinsic. We only support some values of M4.
17963 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17964 Intrinsic::ID CI;
17965 switch (M4.getZExtValue()) {
17966 default: break;
17967 case 4: ID = Intrinsic::minnum;
17968 CI = Intrinsic::experimental_constrained_minnum; break;
17969 }
17970 if (ID != Intrinsic::not_intrinsic) {
17971 if (Builder.getIsFPConstrained()) {
17972 Function *F = CGM.getIntrinsic(CI, ResultType);
17973 return Builder.CreateConstrainedFPCall(F, {X, Y});
17974 } else {
17975 Function *F = CGM.getIntrinsic(ID, ResultType);
17976 return Builder.CreateCall(F, {X, Y});
17977 }
17978 }
17979 switch (BuiltinID) {
17980 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
17981 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
17982 default: llvm_unreachable("Unknown BuiltinID");
17983 }
17984 Function *F = CGM.getIntrinsic(ID);
17985 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
17986 return Builder.CreateCall(F, {X, Y, M4Value});
17987 }
17988
17989 case SystemZ::BI__builtin_s390_vlbrh:
17990 case SystemZ::BI__builtin_s390_vlbrf:
17991 case SystemZ::BI__builtin_s390_vlbrg: {
17992 llvm::Type *ResultType = ConvertType(E->getType());
17993 Value *X = EmitScalarExpr(E->getArg(0));
17994 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
17995 return Builder.CreateCall(F, X);
17996 }
17997
17998 // Vector intrinsics that output the post-instruction CC value.
17999
18000#define INTRINSIC_WITH_CC(NAME) \
18001 case SystemZ::BI__builtin_##NAME: \
18002 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
18003
18004 INTRINSIC_WITH_CC(s390_vpkshs);
18005 INTRINSIC_WITH_CC(s390_vpksfs);
18006 INTRINSIC_WITH_CC(s390_vpksgs);
18007
18008 INTRINSIC_WITH_CC(s390_vpklshs);
18009 INTRINSIC_WITH_CC(s390_vpklsfs);
18010 INTRINSIC_WITH_CC(s390_vpklsgs);
18011
18012 INTRINSIC_WITH_CC(s390_vceqbs);
18013 INTRINSIC_WITH_CC(s390_vceqhs);
18014 INTRINSIC_WITH_CC(s390_vceqfs);
18015 INTRINSIC_WITH_CC(s390_vceqgs);
18016
18017 INTRINSIC_WITH_CC(s390_vchbs);
18018 INTRINSIC_WITH_CC(s390_vchhs);
18019 INTRINSIC_WITH_CC(s390_vchfs);
18020 INTRINSIC_WITH_CC(s390_vchgs);
18021
18022 INTRINSIC_WITH_CC(s390_vchlbs);
18023 INTRINSIC_WITH_CC(s390_vchlhs);
18024 INTRINSIC_WITH_CC(s390_vchlfs);
18025 INTRINSIC_WITH_CC(s390_vchlgs);
18026
18027 INTRINSIC_WITH_CC(s390_vfaebs);
18028 INTRINSIC_WITH_CC(s390_vfaehs);
18029 INTRINSIC_WITH_CC(s390_vfaefs);
18030
18031 INTRINSIC_WITH_CC(s390_vfaezbs);
18032 INTRINSIC_WITH_CC(s390_vfaezhs);
18033 INTRINSIC_WITH_CC(s390_vfaezfs);
18034
18035 INTRINSIC_WITH_CC(s390_vfeebs);
18036 INTRINSIC_WITH_CC(s390_vfeehs);
18037 INTRINSIC_WITH_CC(s390_vfeefs);
18038
18039 INTRINSIC_WITH_CC(s390_vfeezbs);
18040 INTRINSIC_WITH_CC(s390_vfeezhs);
18041 INTRINSIC_WITH_CC(s390_vfeezfs);
18042
18043 INTRINSIC_WITH_CC(s390_vfenebs);
18044 INTRINSIC_WITH_CC(s390_vfenehs);
18045 INTRINSIC_WITH_CC(s390_vfenefs);
18046
18047 INTRINSIC_WITH_CC(s390_vfenezbs);
18048 INTRINSIC_WITH_CC(s390_vfenezhs);
18049 INTRINSIC_WITH_CC(s390_vfenezfs);
18050
18051 INTRINSIC_WITH_CC(s390_vistrbs);
18052 INTRINSIC_WITH_CC(s390_vistrhs);
18053 INTRINSIC_WITH_CC(s390_vistrfs);
18054
18055 INTRINSIC_WITH_CC(s390_vstrcbs);
18056 INTRINSIC_WITH_CC(s390_vstrchs);
18057 INTRINSIC_WITH_CC(s390_vstrcfs);
18058
18059 INTRINSIC_WITH_CC(s390_vstrczbs);
18060 INTRINSIC_WITH_CC(s390_vstrczhs);
18061 INTRINSIC_WITH_CC(s390_vstrczfs);
18062
18063 INTRINSIC_WITH_CC(s390_vfcesbs);
18064 INTRINSIC_WITH_CC(s390_vfcedbs);
18065 INTRINSIC_WITH_CC(s390_vfchsbs);
18066 INTRINSIC_WITH_CC(s390_vfchdbs);
18067 INTRINSIC_WITH_CC(s390_vfchesbs);
18068 INTRINSIC_WITH_CC(s390_vfchedbs);
18069
18070 INTRINSIC_WITH_CC(s390_vftcisb);
18071 INTRINSIC_WITH_CC(s390_vftcidb);
18072
18073 INTRINSIC_WITH_CC(s390_vstrsb);
18074 INTRINSIC_WITH_CC(s390_vstrsh);
18075 INTRINSIC_WITH_CC(s390_vstrsf);
18076
18077 INTRINSIC_WITH_CC(s390_vstrszb);
18078 INTRINSIC_WITH_CC(s390_vstrszh);
18079 INTRINSIC_WITH_CC(s390_vstrszf);
18080
18081#undef INTRINSIC_WITH_CC
18082
18083 default:
18084 return nullptr;
18085 }
18086}
18087
18088namespace {
18089// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
18090struct NVPTXMmaLdstInfo {
18091 unsigned NumResults; // Number of elements to load/store
18092 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
18093 unsigned IID_col;
18094 unsigned IID_row;
18095};
18096
18097#define MMA_INTR(geom_op_type, layout) \
18098 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
18099#define MMA_LDST(n, geom_op_type) \
18100 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
18101
18102static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
18103 switch (BuiltinID) {
18104 // FP MMA loads
18105 case NVPTX::BI__hmma_m16n16k16_ld_a:
18106 return MMA_LDST(8, m16n16k16_load_a_f16);
18107 case NVPTX::BI__hmma_m16n16k16_ld_b:
18108 return MMA_LDST(8, m16n16k16_load_b_f16);
18109 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
18110 return MMA_LDST(4, m16n16k16_load_c_f16);
18111 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
18112 return MMA_LDST(8, m16n16k16_load_c_f32);
18113 case NVPTX::BI__hmma_m32n8k16_ld_a:
18114 return MMA_LDST(8, m32n8k16_load_a_f16);
18115 case NVPTX::BI__hmma_m32n8k16_ld_b:
18116 return MMA_LDST(8, m32n8k16_load_b_f16);
18117 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
18118 return MMA_LDST(4, m32n8k16_load_c_f16);
18119 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
18120 return MMA_LDST(8, m32n8k16_load_c_f32);
18121 case NVPTX::BI__hmma_m8n32k16_ld_a:
18122 return MMA_LDST(8, m8n32k16_load_a_f16);
18123 case NVPTX::BI__hmma_m8n32k16_ld_b:
18124 return MMA_LDST(8, m8n32k16_load_b_f16);
18125 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
18126 return MMA_LDST(4, m8n32k16_load_c_f16);
18127 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
18128 return MMA_LDST(8, m8n32k16_load_c_f32);
18129
18130 // Integer MMA loads
18131 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
18132 return MMA_LDST(2, m16n16k16_load_a_s8);
18133 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
18134 return MMA_LDST(2, m16n16k16_load_a_u8);
18135 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
18136 return MMA_LDST(2, m16n16k16_load_b_s8);
18137 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
18138 return MMA_LDST(2, m16n16k16_load_b_u8);
18139 case NVPTX::BI__imma_m16n16k16_ld_c:
18140 return MMA_LDST(8, m16n16k16_load_c_s32);
18141 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
18142 return MMA_LDST(4, m32n8k16_load_a_s8);
18143 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
18144 return MMA_LDST(4, m32n8k16_load_a_u8);
18145 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
18146 return MMA_LDST(1, m32n8k16_load_b_s8);
18147 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
18148 return MMA_LDST(1, m32n8k16_load_b_u8);
18149 case NVPTX::BI__imma_m32n8k16_ld_c:
18150 return MMA_LDST(8, m32n8k16_load_c_s32);
18151 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
18152 return MMA_LDST(1, m8n32k16_load_a_s8);
18153 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
18154 return MMA_LDST(1, m8n32k16_load_a_u8);
18155 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
18156 return MMA_LDST(4, m8n32k16_load_b_s8);
18157 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
18158 return MMA_LDST(4, m8n32k16_load_b_u8);
18159 case NVPTX::BI__imma_m8n32k16_ld_c:
18160 return MMA_LDST(8, m8n32k16_load_c_s32);
18161
18162 // Sub-integer MMA loads.
18163 // Only row/col layout is supported by A/B fragments.
18164 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
18165 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
18166 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
18167 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
18168 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
18169 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
18170 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
18171 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
18172 case NVPTX::BI__imma_m8n8k32_ld_c:
18173 return MMA_LDST(2, m8n8k32_load_c_s32);
18174 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
18175 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
18176 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
18177 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
18178 case NVPTX::BI__bmma_m8n8k128_ld_c:
18179 return MMA_LDST(2, m8n8k128_load_c_s32);
18180
18181 // Double MMA loads
18182 case NVPTX::BI__dmma_m8n8k4_ld_a:
18183 return MMA_LDST(1, m8n8k4_load_a_f64);
18184 case NVPTX::BI__dmma_m8n8k4_ld_b:
18185 return MMA_LDST(1, m8n8k4_load_b_f64);
18186 case NVPTX::BI__dmma_m8n8k4_ld_c:
18187 return MMA_LDST(2, m8n8k4_load_c_f64);
18188
18189 // Alternate float MMA loads
18190 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
18191 return MMA_LDST(4, m16n16k16_load_a_bf16);
18192 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
18193 return MMA_LDST(4, m16n16k16_load_b_bf16);
18194 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
18195 return MMA_LDST(2, m8n32k16_load_a_bf16);
18196 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
18197 return MMA_LDST(8, m8n32k16_load_b_bf16);
18198 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
18199 return MMA_LDST(8, m32n8k16_load_a_bf16);
18200 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
18201 return MMA_LDST(2, m32n8k16_load_b_bf16);
18202 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
18203 return MMA_LDST(4, m16n16k8_load_a_tf32);
18204 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
18205 return MMA_LDST(4, m16n16k8_load_b_tf32);
18206 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
18207 return MMA_LDST(8, m16n16k8_load_c_f32);
18208
18209 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
18210 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
18211 // use fragment C for both loads and stores.
18212 // FP MMA stores.
18213 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
18214 return MMA_LDST(4, m16n16k16_store_d_f16);
18215 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
18216 return MMA_LDST(8, m16n16k16_store_d_f32);
18217 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
18218 return MMA_LDST(4, m32n8k16_store_d_f16);
18219 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
18220 return MMA_LDST(8, m32n8k16_store_d_f32);
18221 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
18222 return MMA_LDST(4, m8n32k16_store_d_f16);
18223 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
18224 return MMA_LDST(8, m8n32k16_store_d_f32);
18225
18226 // Integer and sub-integer MMA stores.
18227 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
18228 // name, integer loads/stores use LLVM's i32.
18229 case NVPTX::BI__imma_m16n16k16_st_c_i32:
18230 return MMA_LDST(8, m16n16k16_store_d_s32);
18231 case NVPTX::BI__imma_m32n8k16_st_c_i32:
18232 return MMA_LDST(8, m32n8k16_store_d_s32);
18233 case NVPTX::BI__imma_m8n32k16_st_c_i32:
18234 return MMA_LDST(8, m8n32k16_store_d_s32);
18235 case NVPTX::BI__imma_m8n8k32_st_c_i32:
18236 return MMA_LDST(2, m8n8k32_store_d_s32);
18237 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
18238 return MMA_LDST(2, m8n8k128_store_d_s32);
18239
18240 // Double MMA store
18241 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
18242 return MMA_LDST(2, m8n8k4_store_d_f64);
18243
18244 // Alternate float MMA store
18245 case NVPTX::BI__mma_m16n16k8_st_c_f32:
18246 return MMA_LDST(8, m16n16k8_store_d_f32);
18247
18248 default:
18249 llvm_unreachable("Unknown MMA builtin");
18250 }
18251}
18252#undef MMA_LDST
18253#undef MMA_INTR
18254
18255
18256struct NVPTXMmaInfo {
18257 unsigned NumEltsA;
18258 unsigned NumEltsB;
18259 unsigned NumEltsC;
18260 unsigned NumEltsD;
18261
18262 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
18263 // over 'col' for layout. The index of non-satf variants is expected to match
18264 // the undocumented layout constants used by CUDA's mma.hpp.
18265 std::array<unsigned, 8> Variants;
18266
18267 unsigned getMMAIntrinsic(int Layout, bool Satf) {
18268 unsigned Index = Layout + 4 * Satf;
18269 if (Index >= Variants.size())
18270 return 0;
18271 return Variants[Index];
18272 }
18273};
18274
18275 // Returns an intrinsic that matches Layout and Satf for valid combinations of
18276 // Layout and Satf, 0 otherwise.
18277static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
18278 // clang-format off
18279#define MMA_VARIANTS(geom, type) \
18280 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
18281 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
18282 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
18283 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
18284#define MMA_SATF_VARIANTS(geom, type) \
18285 MMA_VARIANTS(geom, type), \
18286 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
18287 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
18288 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
18289 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
18290// Sub-integer MMA only supports row.col layout.
18291#define MMA_VARIANTS_I4(geom, type) \
18292 0, \
18293 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
18294 0, \
18295 0, \
18296 0, \
18297 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
18298 0, \
18299 0
18300// b1 MMA does not support .satfinite.
18301#define MMA_VARIANTS_B1_XOR(geom, type) \
18302 0, \
18303 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
18304 0, \
18305 0, \
18306 0, \
18307 0, \
18308 0, \
18309 0
18310#define MMA_VARIANTS_B1_AND(geom, type) \
18311 0, \
18312 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
18313 0, \
18314 0, \
18315 0, \
18316 0, \
18317 0, \
18318 0
18319 // clang-format on
18320 switch (BuiltinID) {
18321 // FP MMA
18322 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
18323 // NumEltsN of return value are ordered as A,B,C,D.
18324 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
18325 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
18326 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
18327 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
18328 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
18329 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
18330 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
18331 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
18332 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
18333 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
18334 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
18335 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
18336 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
18337 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
18338 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
18339 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
18340 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
18341 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
18342 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
18343 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
18344 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
18345 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
18346 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
18347 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
18348
18349 // Integer MMA
18350 case NVPTX::BI__imma_m16n16k16_mma_s8:
18351 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
18352 case NVPTX::BI__imma_m16n16k16_mma_u8:
18353 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
18354 case NVPTX::BI__imma_m32n8k16_mma_s8:
18355 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
18356 case NVPTX::BI__imma_m32n8k16_mma_u8:
18357 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
18358 case NVPTX::BI__imma_m8n32k16_mma_s8:
18359 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
18360 case NVPTX::BI__imma_m8n32k16_mma_u8:
18361 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
18362
18363 // Sub-integer MMA
18364 case NVPTX::BI__imma_m8n8k32_mma_s4:
18365 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
18366 case NVPTX::BI__imma_m8n8k32_mma_u4:
18367 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
18368 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
18369 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
18370 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
18371 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
18372
18373 // Double MMA
18374 case NVPTX::BI__dmma_m8n8k4_mma_f64:
18375 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
18376
18377 // Alternate FP MMA
18378 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
18379 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
18380 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
18381 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
18382 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
18383 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
18384 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
18385 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
18386 default:
18387 llvm_unreachable("Unexpected builtin ID.");
18388 }
18389#undef MMA_VARIANTS
18390#undef MMA_SATF_VARIANTS
18391#undef MMA_VARIANTS_I4
18392#undef MMA_VARIANTS_B1_AND
18393#undef MMA_VARIANTS_B1_XOR
18394}
18395
18396static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
18397 const CallExpr *E) {
18398 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
18399 QualType ArgType = E->getArg(0)->getType();
18401 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
18402 return CGF.Builder.CreateCall(
18403 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
18404 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
18405}
18406
18407static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
18408 const CallExpr *E) {
18409 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
18410 llvm::Type *ElemTy =
18412 return CGF.Builder.CreateCall(
18413 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
18414 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
18415}
18416
18417static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
18418 CodeGenFunction &CGF, const CallExpr *E,
18419 int SrcSize) {
18420 return E->getNumArgs() == 3
18421 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
18422 {CGF.EmitScalarExpr(E->getArg(0)),
18423 CGF.EmitScalarExpr(E->getArg(1)),
18424 CGF.EmitScalarExpr(E->getArg(2))})
18425 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
18426 {CGF.EmitScalarExpr(E->getArg(0)),
18427 CGF.EmitScalarExpr(E->getArg(1))});
18428}
18429
18430static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
18431 const CallExpr *E, CodeGenFunction &CGF) {
18432 auto &C = CGF.CGM.getContext();
18433 if (!(C.getLangOpts().NativeHalfType ||
18434 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
18435 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
18436 " requires native half type support.");
18437 return nullptr;
18438 }
18439
18440 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
18441 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
18442 return MakeLdgLdu(IntrinsicID, CGF, E);
18443
18445 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
18446 auto *FTy = F->getFunctionType();
18447 unsigned ICEArguments = 0;
18449 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
18450 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18451 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
18452 assert((ICEArguments & (1 << i)) == 0);
18453 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
18454 auto *PTy = FTy->getParamType(i);
18455 if (PTy != ArgValue->getType())
18456 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
18457 Args.push_back(ArgValue);
18458 }
18459
18460 return CGF.Builder.CreateCall(F, Args);
18461}
18462} // namespace
18463
18465 const CallExpr *E) {
18466 switch (BuiltinID) {
18467 case NVPTX::BI__nvvm_atom_add_gen_i:
18468 case NVPTX::BI__nvvm_atom_add_gen_l:
18469 case NVPTX::BI__nvvm_atom_add_gen_ll:
18470 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
18471
18472 case NVPTX::BI__nvvm_atom_sub_gen_i:
18473 case NVPTX::BI__nvvm_atom_sub_gen_l:
18474 case NVPTX::BI__nvvm_atom_sub_gen_ll:
18475 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
18476
18477 case NVPTX::BI__nvvm_atom_and_gen_i:
18478 case NVPTX::BI__nvvm_atom_and_gen_l:
18479 case NVPTX::BI__nvvm_atom_and_gen_ll:
18480 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
18481
18482 case NVPTX::BI__nvvm_atom_or_gen_i:
18483 case NVPTX::BI__nvvm_atom_or_gen_l:
18484 case NVPTX::BI__nvvm_atom_or_gen_ll:
18485 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
18486
18487 case NVPTX::BI__nvvm_atom_xor_gen_i:
18488 case NVPTX::BI__nvvm_atom_xor_gen_l:
18489 case NVPTX::BI__nvvm_atom_xor_gen_ll:
18490 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
18491
18492 case NVPTX::BI__nvvm_atom_xchg_gen_i:
18493 case NVPTX::BI__nvvm_atom_xchg_gen_l:
18494 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
18495 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
18496
18497 case NVPTX::BI__nvvm_atom_max_gen_i:
18498 case NVPTX::BI__nvvm_atom_max_gen_l:
18499 case NVPTX::BI__nvvm_atom_max_gen_ll:
18500 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
18501
18502 case NVPTX::BI__nvvm_atom_max_gen_ui:
18503 case NVPTX::BI__nvvm_atom_max_gen_ul:
18504 case NVPTX::BI__nvvm_atom_max_gen_ull:
18505 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
18506
18507 case NVPTX::BI__nvvm_atom_min_gen_i:
18508 case NVPTX::BI__nvvm_atom_min_gen_l:
18509 case NVPTX::BI__nvvm_atom_min_gen_ll:
18510 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
18511
18512 case NVPTX::BI__nvvm_atom_min_gen_ui:
18513 case NVPTX::BI__nvvm_atom_min_gen_ul:
18514 case NVPTX::BI__nvvm_atom_min_gen_ull:
18515 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
18516
18517 case NVPTX::BI__nvvm_atom_cas_gen_i:
18518 case NVPTX::BI__nvvm_atom_cas_gen_l:
18519 case NVPTX::BI__nvvm_atom_cas_gen_ll:
18520 // __nvvm_atom_cas_gen_* should return the old value rather than the
18521 // success flag.
18522 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
18523
18524 case NVPTX::BI__nvvm_atom_add_gen_f:
18525 case NVPTX::BI__nvvm_atom_add_gen_d: {
18526 Value *Ptr = EmitScalarExpr(E->getArg(0));
18527 Value *Val = EmitScalarExpr(E->getArg(1));
18528 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
18529 AtomicOrdering::SequentiallyConsistent);
18530 }
18531
18532 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
18533 Value *Ptr = EmitScalarExpr(E->getArg(0));
18534 Value *Val = EmitScalarExpr(E->getArg(1));
18535 Function *FnALI32 =
18536 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
18537 return Builder.CreateCall(FnALI32, {Ptr, Val});
18538 }
18539
18540 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
18541 Value *Ptr = EmitScalarExpr(E->getArg(0));
18542 Value *Val = EmitScalarExpr(E->getArg(1));
18543 Function *FnALD32 =
18544 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
18545 return Builder.CreateCall(FnALD32, {Ptr, Val});
18546 }
18547
18548 case NVPTX::BI__nvvm_ldg_c:
18549 case NVPTX::BI__nvvm_ldg_sc:
18550 case NVPTX::BI__nvvm_ldg_c2:
18551 case NVPTX::BI__nvvm_ldg_sc2:
18552 case NVPTX::BI__nvvm_ldg_c4:
18553 case NVPTX::BI__nvvm_ldg_sc4:
18554 case NVPTX::BI__nvvm_ldg_s:
18555 case NVPTX::BI__nvvm_ldg_s2:
18556 case NVPTX::BI__nvvm_ldg_s4:
18557 case NVPTX::BI__nvvm_ldg_i:
18558 case NVPTX::BI__nvvm_ldg_i2:
18559 case NVPTX::BI__nvvm_ldg_i4:
18560 case NVPTX::BI__nvvm_ldg_l:
18561 case NVPTX::BI__nvvm_ldg_l2:
18562 case NVPTX::BI__nvvm_ldg_ll:
18563 case NVPTX::BI__nvvm_ldg_ll2:
18564 case NVPTX::BI__nvvm_ldg_uc:
18565 case NVPTX::BI__nvvm_ldg_uc2:
18566 case NVPTX::BI__nvvm_ldg_uc4:
18567 case NVPTX::BI__nvvm_ldg_us:
18568 case NVPTX::BI__nvvm_ldg_us2:
18569 case NVPTX::BI__nvvm_ldg_us4:
18570 case NVPTX::BI__nvvm_ldg_ui:
18571 case NVPTX::BI__nvvm_ldg_ui2:
18572 case NVPTX::BI__nvvm_ldg_ui4:
18573 case NVPTX::BI__nvvm_ldg_ul:
18574 case NVPTX::BI__nvvm_ldg_ul2:
18575 case NVPTX::BI__nvvm_ldg_ull:
18576 case NVPTX::BI__nvvm_ldg_ull2:
18577 // PTX Interoperability section 2.2: "For a vector with an even number of
18578 // elements, its alignment is set to number of elements times the alignment
18579 // of its member: n*alignof(t)."
18580 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
18581 case NVPTX::BI__nvvm_ldg_f:
18582 case NVPTX::BI__nvvm_ldg_f2:
18583 case NVPTX::BI__nvvm_ldg_f4:
18584 case NVPTX::BI__nvvm_ldg_d:
18585 case NVPTX::BI__nvvm_ldg_d2:
18586 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
18587
18588 case NVPTX::BI__nvvm_ldu_c:
18589 case NVPTX::BI__nvvm_ldu_sc:
18590 case NVPTX::BI__nvvm_ldu_c2:
18591 case NVPTX::BI__nvvm_ldu_sc2:
18592 case NVPTX::BI__nvvm_ldu_c4:
18593 case NVPTX::BI__nvvm_ldu_sc4:
18594 case NVPTX::BI__nvvm_ldu_s:
18595 case NVPTX::BI__nvvm_ldu_s2:
18596 case NVPTX::BI__nvvm_ldu_s4:
18597 case NVPTX::BI__nvvm_ldu_i:
18598 case NVPTX::BI__nvvm_ldu_i2:
18599 case NVPTX::BI__nvvm_ldu_i4:
18600 case NVPTX::BI__nvvm_ldu_l:
18601 case NVPTX::BI__nvvm_ldu_l2:
18602 case NVPTX::BI__nvvm_ldu_ll:
18603 case NVPTX::BI__nvvm_ldu_ll2:
18604 case NVPTX::BI__nvvm_ldu_uc:
18605 case NVPTX::BI__nvvm_ldu_uc2:
18606 case NVPTX::BI__nvvm_ldu_uc4:
18607 case NVPTX::BI__nvvm_ldu_us:
18608 case NVPTX::BI__nvvm_ldu_us2:
18609 case NVPTX::BI__nvvm_ldu_us4:
18610 case NVPTX::BI__nvvm_ldu_ui:
18611 case NVPTX::BI__nvvm_ldu_ui2:
18612 case NVPTX::BI__nvvm_ldu_ui4:
18613 case NVPTX::BI__nvvm_ldu_ul:
18614 case NVPTX::BI__nvvm_ldu_ul2:
18615 case NVPTX::BI__nvvm_ldu_ull:
18616 case NVPTX::BI__nvvm_ldu_ull2:
18617 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
18618 case NVPTX::BI__nvvm_ldu_f:
18619 case NVPTX::BI__nvvm_ldu_f2:
18620 case NVPTX::BI__nvvm_ldu_f4:
18621 case NVPTX::BI__nvvm_ldu_d:
18622 case NVPTX::BI__nvvm_ldu_d2:
18623 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
18624
18625 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
18626 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
18627 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
18628 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
18629 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
18630 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
18631 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
18632 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
18633 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
18634 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
18635 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
18636 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
18637 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
18638 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
18639 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
18640 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
18641 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
18642 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
18643 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
18644 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
18645 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
18646 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
18647 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
18648 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
18649 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
18650 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
18651 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
18652 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
18653 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
18654 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
18655 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
18656 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
18657 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
18658 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
18659 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
18660 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
18661 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
18662 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
18663 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
18664 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
18665 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
18666 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
18667 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
18668 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
18669 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
18670 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
18671 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
18672 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
18673 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
18674 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
18675 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
18676 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
18677 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
18678 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
18679 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
18680 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
18681 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
18682 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
18683 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
18684 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
18685 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
18686 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
18687 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
18688 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
18689 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
18690 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
18691 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
18692 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
18693 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
18694 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
18695 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
18696 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
18697 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
18698 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
18699 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
18700 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
18701 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
18702 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
18703 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
18704 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
18705 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
18706 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
18707 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
18708 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
18709 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
18710 Value *Ptr = EmitScalarExpr(E->getArg(0));
18711 llvm::Type *ElemTy =
18713 return Builder.CreateCall(
18715 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
18716 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
18717 }
18718 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
18719 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
18720 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
18721 Value *Ptr = EmitScalarExpr(E->getArg(0));
18722 llvm::Type *ElemTy =
18724 return Builder.CreateCall(
18726 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
18727 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
18728 }
18729 case NVPTX::BI__nvvm_match_all_sync_i32p:
18730 case NVPTX::BI__nvvm_match_all_sync_i64p: {
18731 Value *Mask = EmitScalarExpr(E->getArg(0));
18732 Value *Val = EmitScalarExpr(E->getArg(1));
18733 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
18734 Value *ResultPair = Builder.CreateCall(
18735 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
18736 ? Intrinsic::nvvm_match_all_sync_i32p
18737 : Intrinsic::nvvm_match_all_sync_i64p),
18738 {Mask, Val});
18739 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
18740 PredOutPtr.getElementType());
18741 Builder.CreateStore(Pred, PredOutPtr);
18742 return Builder.CreateExtractValue(ResultPair, 0);
18743 }
18744
18745 // FP MMA loads
18746 case NVPTX::BI__hmma_m16n16k16_ld_a:
18747 case NVPTX::BI__hmma_m16n16k16_ld_b:
18748 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
18749 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
18750 case NVPTX::BI__hmma_m32n8k16_ld_a:
18751 case NVPTX::BI__hmma_m32n8k16_ld_b:
18752 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
18753 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
18754 case NVPTX::BI__hmma_m8n32k16_ld_a:
18755 case NVPTX::BI__hmma_m8n32k16_ld_b:
18756 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
18757 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
18758 // Integer MMA loads.
18759 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
18760 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
18761 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
18762 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
18763 case NVPTX::BI__imma_m16n16k16_ld_c:
18764 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
18765 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
18766 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
18767 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
18768 case NVPTX::BI__imma_m32n8k16_ld_c:
18769 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
18770 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
18771 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
18772 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
18773 case NVPTX::BI__imma_m8n32k16_ld_c:
18774 // Sub-integer MMA loads.
18775 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
18776 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
18777 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
18778 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
18779 case NVPTX::BI__imma_m8n8k32_ld_c:
18780 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
18781 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
18782 case NVPTX::BI__bmma_m8n8k128_ld_c:
18783 // Double MMA loads.
18784 case NVPTX::BI__dmma_m8n8k4_ld_a:
18785 case NVPTX::BI__dmma_m8n8k4_ld_b:
18786 case NVPTX::BI__dmma_m8n8k4_ld_c:
18787 // Alternate float MMA loads.
18788 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
18789 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
18790 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
18791 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
18792 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
18793 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
18794 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
18795 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
18796 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
18798 Value *Src = EmitScalarExpr(E->getArg(1));
18799 Value *Ldm = EmitScalarExpr(E->getArg(2));
18800 std::optional<llvm::APSInt> isColMajorArg =
18802 if (!isColMajorArg)
18803 return nullptr;
18804 bool isColMajor = isColMajorArg->getSExtValue();
18805 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
18806 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
18807 if (IID == 0)
18808 return nullptr;
18809
18810 Value *Result =
18811 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
18812
18813 // Save returned values.
18814 assert(II.NumResults);
18815 if (II.NumResults == 1) {
18816 Builder.CreateAlignedStore(Result, Dst.getPointer(),
18818 } else {
18819 for (unsigned i = 0; i < II.NumResults; ++i) {
18820 Builder.CreateAlignedStore(
18821 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
18822 Dst.getElementType()),
18823 Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
18824 llvm::ConstantInt::get(IntTy, i)),
18826 }
18827 }
18828 return Result;
18829 }
18830
18831 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
18832 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
18833 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
18834 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
18835 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
18836 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
18837 case NVPTX::BI__imma_m16n16k16_st_c_i32:
18838 case NVPTX::BI__imma_m32n8k16_st_c_i32:
18839 case NVPTX::BI__imma_m8n32k16_st_c_i32:
18840 case NVPTX::BI__imma_m8n8k32_st_c_i32:
18841 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
18842 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
18843 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
18844 Value *Dst = EmitScalarExpr(E->getArg(0));
18846 Value *Ldm = EmitScalarExpr(E->getArg(2));
18847 std::optional<llvm::APSInt> isColMajorArg =
18849 if (!isColMajorArg)
18850 return nullptr;
18851 bool isColMajor = isColMajorArg->getSExtValue();
18852 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
18853 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
18854 if (IID == 0)
18855 return nullptr;
18856 Function *Intrinsic =
18857 CGM.getIntrinsic(IID, Dst->getType());
18858 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
18859 SmallVector<Value *, 10> Values = {Dst};
18860 for (unsigned i = 0; i < II.NumResults; ++i) {
18861 Value *V = Builder.CreateAlignedLoad(
18862 Src.getElementType(),
18863 Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
18864 llvm::ConstantInt::get(IntTy, i)),
18866 Values.push_back(Builder.CreateBitCast(V, ParamType));
18867 }
18868 Values.push_back(Ldm);
18869 Value *Result = Builder.CreateCall(Intrinsic, Values);
18870 return Result;
18871 }
18872
18873 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
18874 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
18875 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
18876 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
18877 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
18878 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
18879 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
18880 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
18881 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
18882 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
18883 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
18884 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
18885 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
18886 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
18887 case NVPTX::BI__imma_m16n16k16_mma_s8:
18888 case NVPTX::BI__imma_m16n16k16_mma_u8:
18889 case NVPTX::BI__imma_m32n8k16_mma_s8:
18890 case NVPTX::BI__imma_m32n8k16_mma_u8:
18891 case NVPTX::BI__imma_m8n32k16_mma_s8:
18892 case NVPTX::BI__imma_m8n32k16_mma_u8:
18893 case NVPTX::BI__imma_m8n8k32_mma_s4:
18894 case NVPTX::BI__imma_m8n8k32_mma_u4:
18895 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
18896 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
18897 case NVPTX::BI__dmma_m8n8k4_mma_f64:
18898 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
18899 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
18900 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
18901 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
18906 std::optional<llvm::APSInt> LayoutArg =
18908 if (!LayoutArg)
18909 return nullptr;
18910 int Layout = LayoutArg->getSExtValue();
18911 if (Layout < 0 || Layout > 3)
18912 return nullptr;
18913 llvm::APSInt SatfArg;
18914 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
18915 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
18916 SatfArg = 0; // .b1 does not have satf argument.
18917 else if (std::optional<llvm::APSInt> OptSatfArg =
18919 SatfArg = *OptSatfArg;
18920 else
18921 return nullptr;
18922 bool Satf = SatfArg.getSExtValue();
18923 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
18924 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
18925 if (IID == 0) // Unsupported combination of Layout/Satf.
18926 return nullptr;
18927
18929 Function *Intrinsic = CGM.getIntrinsic(IID);
18930 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
18931 // Load A
18932 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
18933 Value *V = Builder.CreateAlignedLoad(
18934 SrcA.getElementType(),
18935 Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
18936 llvm::ConstantInt::get(IntTy, i)),
18938 Values.push_back(Builder.CreateBitCast(V, AType));
18939 }
18940 // Load B
18941 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
18942 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
18943 Value *V = Builder.CreateAlignedLoad(
18944 SrcB.getElementType(),
18945 Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
18946 llvm::ConstantInt::get(IntTy, i)),
18948 Values.push_back(Builder.CreateBitCast(V, BType));
18949 }
18950 // Load C
18951 llvm::Type *CType =
18952 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
18953 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
18954 Value *V = Builder.CreateAlignedLoad(
18955 SrcC.getElementType(),
18956 Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
18957 llvm::ConstantInt::get(IntTy, i)),
18959 Values.push_back(Builder.CreateBitCast(V, CType));
18960 }
18961 Value *Result = Builder.CreateCall(Intrinsic, Values);
18962 llvm::Type *DType = Dst.getElementType();
18963 for (unsigned i = 0; i < MI.NumEltsD; ++i)
18964 Builder.CreateAlignedStore(
18965 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
18966 Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
18967 llvm::ConstantInt::get(IntTy, i)),
18969 return Result;
18970 }
18971 // The following builtins require half type support
18972 case NVPTX::BI__nvvm_ex2_approx_f16:
18973 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
18974 case NVPTX::BI__nvvm_ex2_approx_f16x2:
18975 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
18976 case NVPTX::BI__nvvm_ff2f16x2_rn:
18977 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
18978 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
18979 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
18980 case NVPTX::BI__nvvm_ff2f16x2_rz:
18981 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
18982 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
18983 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
18984 case NVPTX::BI__nvvm_fma_rn_f16:
18985 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
18986 case NVPTX::BI__nvvm_fma_rn_f16x2:
18987 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
18988 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
18989 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
18990 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
18991 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
18992 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
18993 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
18994 *this);
18995 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
18996 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
18997 *this);
18998 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
18999 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
19000 *this);
19001 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
19002 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
19003 *this);
19004 case NVPTX::BI__nvvm_fma_rn_relu_f16:
19005 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
19006 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
19007 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
19008 case NVPTX::BI__nvvm_fma_rn_sat_f16:
19009 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
19010 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
19011 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
19012 case NVPTX::BI__nvvm_fmax_f16:
19013 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
19014 case NVPTX::BI__nvvm_fmax_f16x2:
19015 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
19016 case NVPTX::BI__nvvm_fmax_ftz_f16:
19017 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
19018 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
19019 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
19020 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
19021 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
19022 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
19023 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
19024 *this);
19025 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
19026 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
19027 E, *this);
19028 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
19029 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
19030 BuiltinID, E, *this);
19031 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
19032 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
19033 *this);
19034 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
19035 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
19036 E, *this);
19037 case NVPTX::BI__nvvm_fmax_nan_f16:
19038 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
19039 case NVPTX::BI__nvvm_fmax_nan_f16x2:
19040 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
19041 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
19042 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
19043 *this);
19044 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
19045 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
19046 E, *this);
19047 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
19048 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
19049 *this);
19050 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
19051 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
19052 *this);
19053 case NVPTX::BI__nvvm_fmin_f16:
19054 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
19055 case NVPTX::BI__nvvm_fmin_f16x2:
19056 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
19057 case NVPTX::BI__nvvm_fmin_ftz_f16:
19058 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
19059 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
19060 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
19061 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
19062 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
19063 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
19064 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
19065 *this);
19066 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
19067 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
19068 E, *this);
19069 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
19070 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
19071 BuiltinID, E, *this);
19072 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
19073 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
19074 *this);
19075 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
19076 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
19077 E, *this);
19078 case NVPTX::BI__nvvm_fmin_nan_f16:
19079 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
19080 case NVPTX::BI__nvvm_fmin_nan_f16x2:
19081 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
19082 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
19083 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
19084 *this);
19085 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
19086 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
19087 E, *this);
19088 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
19089 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
19090 *this);
19091 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
19092 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
19093 *this);
19094 case NVPTX::BI__nvvm_ldg_h:
19095 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
19096 case NVPTX::BI__nvvm_ldg_h2:
19097 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
19098 case NVPTX::BI__nvvm_ldu_h:
19099 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
19100 case NVPTX::BI__nvvm_ldu_h2: {
19101 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
19102 }
19103 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
19104 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
19105 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
19106 4);
19107 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
19108 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
19109 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
19110 8);
19111 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
19112 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
19113 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
19114 16);
19115 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
19116 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
19117 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
19118 16);
19119 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
19120 return Builder.CreateCall(
19121 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
19122 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
19123 return Builder.CreateCall(
19124 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
19125 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
19126 return Builder.CreateCall(
19127 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
19128 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
19129 return Builder.CreateCall(
19130 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
19131 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
19132 return Builder.CreateCall(
19133 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
19134 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
19135 return Builder.CreateCall(
19136 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
19137 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
19138 return Builder.CreateCall(
19139 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
19140 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
19141 return Builder.CreateCall(
19142 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
19143 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
19144 return Builder.CreateCall(
19145 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
19146 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
19147 return Builder.CreateCall(
19148 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
19149 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
19150 return Builder.CreateCall(
19151 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
19152 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
19153 return Builder.CreateCall(
19154 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
19155 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
19156 return Builder.CreateCall(
19157 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
19158 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
19159 return Builder.CreateCall(
19160 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
19161 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
19162 return Builder.CreateCall(
19163 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
19164 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
19165 return Builder.CreateCall(
19166 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
19167 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
19168 return Builder.CreateCall(
19169 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
19170 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
19171 return Builder.CreateCall(
19172 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
19173 case NVPTX::BI__nvvm_is_explicit_cluster:
19174 return Builder.CreateCall(
19175 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
19176 case NVPTX::BI__nvvm_isspacep_shared_cluster:
19177 return Builder.CreateCall(
19178 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
19179 EmitScalarExpr(E->getArg(0)));
19180 case NVPTX::BI__nvvm_mapa:
19181 return Builder.CreateCall(
19182 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
19183 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
19184 case NVPTX::BI__nvvm_mapa_shared_cluster:
19185 return Builder.CreateCall(
19186 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
19187 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
19188 case NVPTX::BI__nvvm_getctarank:
19189 return Builder.CreateCall(
19190 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
19191 EmitScalarExpr(E->getArg(0)));
19192 case NVPTX::BI__nvvm_getctarank_shared_cluster:
19193 return Builder.CreateCall(
19194 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
19195 EmitScalarExpr(E->getArg(0)));
19196 case NVPTX::BI__nvvm_barrier_cluster_arrive:
19197 return Builder.CreateCall(
19198 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
19199 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
19200 return Builder.CreateCall(
19201 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
19202 case NVPTX::BI__nvvm_barrier_cluster_wait:
19203 return Builder.CreateCall(
19204 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
19205 case NVPTX::BI__nvvm_fence_sc_cluster:
19206 return Builder.CreateCall(
19207 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
19208 default:
19209 return nullptr;
19210 }
19211}
19212
19213namespace {
19214struct BuiltinAlignArgs {
19215 llvm::Value *Src = nullptr;
19216 llvm::Type *SrcType = nullptr;
19217 llvm::Value *Alignment = nullptr;
19218 llvm::Value *Mask = nullptr;
19219 llvm::IntegerType *IntType = nullptr;
19220
19221 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
19222 QualType AstType = E->getArg(0)->getType();
19223 if (AstType->isArrayType())
19224 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
19225 else
19226 Src = CGF.EmitScalarExpr(E->getArg(0));
19227 SrcType = Src->getType();
19228 if (SrcType->isPointerTy()) {
19229 IntType = IntegerType::get(
19230 CGF.getLLVMContext(),
19231 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
19232 } else {
19233 assert(SrcType->isIntegerTy());
19234 IntType = cast<llvm::IntegerType>(SrcType);
19235 }
19236 Alignment = CGF.EmitScalarExpr(E->getArg(1));
19237 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
19238 auto *One = llvm::ConstantInt::get(IntType, 1);
19239 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
19240 }
19241};
19242} // namespace
19243
19244/// Generate (x & (y-1)) == 0.
19246 BuiltinAlignArgs Args(E, *this);
19247 llvm::Value *SrcAddress = Args.Src;
19248 if (Args.SrcType->isPointerTy())
19249 SrcAddress =
19250 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
19251 return RValue::get(Builder.CreateICmpEQ(
19252 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
19253 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
19254}
19255
19256/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
19257/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
19258/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
19259/// TODO: actually use ptrmask once most optimization passes know about it.
19260RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
19261 BuiltinAlignArgs Args(E, *this);
19262 llvm::Value *SrcAddr = Args.Src;
19263 if (Args.Src->getType()->isPointerTy())
19264 SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
19265 llvm::Value *SrcForMask = SrcAddr;
19266 if (AlignUp) {
19267 // When aligning up we have to first add the mask to ensure we go over the
19268 // next alignment value and then align down to the next valid multiple.
19269 // By adding the mask, we ensure that align_up on an already aligned
19270 // value will not change the value.
19271 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
19272 }
19273 // Invert the mask to only clear the lower bits.
19274 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
19275 llvm::Value *Result =
19276 Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
19277 if (Args.Src->getType()->isPointerTy()) {
19278 /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
19279 // Result = Builder.CreateIntrinsic(
19280 // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
19281 // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
19282 Result->setName("aligned_intptr");
19283 llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
19284 // The result must point to the same underlying allocation. This means we
19285 // can use an inbounds GEP to enable better optimization.
19286 if (getLangOpts().isSignedOverflowDefined())
19287 Result =
19288 Builder.CreateGEP(Int8Ty, Args.Src, Difference, "aligned_result");
19289 else
19290 Result = EmitCheckedInBoundsGEP(Int8Ty, Args.Src, Difference,
19291 /*SignedIndices=*/true,
19292 /*isSubtraction=*/!AlignUp,
19293 E->getExprLoc(), "aligned_result");
19294 // Emit an alignment assumption to ensure that the new alignment is
19295 // propagated to loads/stores, etc.
19296 emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
19297 }
19298 assert(Result->getType() == Args.SrcType);
19299 return RValue::get(Result);
19300}
19301
19303 const CallExpr *E) {
19304 switch (BuiltinID) {
19305 case WebAssembly::BI__builtin_wasm_memory_size: {
19306 llvm::Type *ResultType = ConvertType(E->getType());
19307 Value *I = EmitScalarExpr(E->getArg(0));
19308 Function *Callee =
19309 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
19310 return Builder.CreateCall(Callee, I);
19311 }
19312 case WebAssembly::BI__builtin_wasm_memory_grow: {
19313 llvm::Type *ResultType = ConvertType(E->getType());
19314 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
19315 EmitScalarExpr(E->getArg(1))};
19316 Function *Callee =
19317 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
19318 return Builder.CreateCall(Callee, Args);
19319 }
19320 case WebAssembly::BI__builtin_wasm_tls_size: {
19321 llvm::Type *ResultType = ConvertType(E->getType());
19322 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
19323 return Builder.CreateCall(Callee);
19324 }
19325 case WebAssembly::BI__builtin_wasm_tls_align: {
19326 llvm::Type *ResultType = ConvertType(E->getType());
19327 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
19328 return Builder.CreateCall(Callee);
19329 }
19330 case WebAssembly::BI__builtin_wasm_tls_base: {
19331 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
19332 return Builder.CreateCall(Callee);
19333 }
19334 case WebAssembly::BI__builtin_wasm_throw: {
19335 Value *Tag = EmitScalarExpr(E->getArg(0));
19336 Value *Obj = EmitScalarExpr(E->getArg(1));
19337 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
19338 return Builder.CreateCall(Callee, {Tag, Obj});
19339 }
19340 case WebAssembly::BI__builtin_wasm_rethrow: {
19341 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
19342 return Builder.CreateCall(Callee);
19343 }
19344 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
19345 Value *Addr = EmitScalarExpr(E->getArg(0));
19347 Value *Timeout = EmitScalarExpr(E->getArg(2));
19348 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
19349 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
19350 }
19351 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
19352 Value *Addr = EmitScalarExpr(E->getArg(0));
19354 Value *Timeout = EmitScalarExpr(E->getArg(2));
19355 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
19356 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
19357 }
19358 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
19359 Value *Addr = EmitScalarExpr(E->getArg(0));
19360 Value *Count = EmitScalarExpr(E->getArg(1));
19361 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
19362 return Builder.CreateCall(Callee, {Addr, Count});
19363 }
19364 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
19365 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
19366 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
19367 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
19368 Value *Src = EmitScalarExpr(E->getArg(0));
19369 llvm::Type *ResT = ConvertType(E->getType());
19370 Function *Callee =
19371 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
19372 return Builder.CreateCall(Callee, {Src});
19373 }
19374 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
19375 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
19376 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
19377 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
19378 Value *Src = EmitScalarExpr(E->getArg(0));
19379 llvm::Type *ResT = ConvertType(E->getType());
19380 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
19381 {ResT, Src->getType()});
19382 return Builder.CreateCall(Callee, {Src});
19383 }
19384 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
19385 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
19386 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
19387 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
19388 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
19389 Value *Src = EmitScalarExpr(E->getArg(0));
19390 llvm::Type *ResT = ConvertType(E->getType());
19391 Function *Callee =
19392 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
19393 return Builder.CreateCall(Callee, {Src});
19394 }
19395 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
19396 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
19397 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
19398 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
19399 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
19400 Value *Src = EmitScalarExpr(E->getArg(0));
19401 llvm::Type *ResT = ConvertType(E->getType());
19402 Function *Callee =
19403 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
19404 return Builder.CreateCall(Callee, {Src});
19405 }
19406 case WebAssembly::BI__builtin_wasm_min_f32:
19407 case WebAssembly::BI__builtin_wasm_min_f64:
19408 case WebAssembly::BI__builtin_wasm_min_f32x4:
19409 case WebAssembly::BI__builtin_wasm_min_f64x2: {
19410 Value *LHS = EmitScalarExpr(E->getArg(0));
19411 Value *RHS = EmitScalarExpr(E->getArg(1));
19412 Function *Callee =
19413 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
19414 return Builder.CreateCall(Callee, {LHS, RHS});
19415 }
19416 case WebAssembly::BI__builtin_wasm_max_f32:
19417 case WebAssembly::BI__builtin_wasm_max_f64:
19418 case WebAssembly::BI__builtin_wasm_max_f32x4:
19419 case WebAssembly::BI__builtin_wasm_max_f64x2: {
19420 Value *LHS = EmitScalarExpr(E->getArg(0));
19421 Value *RHS = EmitScalarExpr(E->getArg(1));
19422 Function *Callee =
19423 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
19424 return Builder.CreateCall(Callee, {LHS, RHS});
19425 }
19426 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
19427 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
19428 Value *LHS = EmitScalarExpr(E->getArg(0));
19429 Value *RHS = EmitScalarExpr(E->getArg(1));
19430 Function *Callee =
19431 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
19432 return Builder.CreateCall(Callee, {LHS, RHS});
19433 }
19434 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
19435 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
19436 Value *LHS = EmitScalarExpr(E->getArg(0));
19437 Value *RHS = EmitScalarExpr(E->getArg(1));
19438 Function *Callee =
19439 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
19440 return Builder.CreateCall(Callee, {LHS, RHS});
19441 }
19442 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
19443 case WebAssembly::BI__builtin_wasm_floor_f32x4:
19444 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
19445 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
19446 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
19447 case WebAssembly::BI__builtin_wasm_floor_f64x2:
19448 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
19449 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
19450 unsigned IntNo;
19451 switch (BuiltinID) {
19452 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
19453 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
19454 IntNo = Intrinsic::ceil;
19455 break;
19456 case WebAssembly::BI__builtin_wasm_floor_f32x4:
19457 case WebAssembly::BI__builtin_wasm_floor_f64x2:
19458 IntNo = Intrinsic::floor;
19459 break;
19460 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
19461 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
19462 IntNo = Intrinsic::trunc;
19463 break;
19464 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
19465 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
19466 IntNo = Intrinsic::nearbyint;
19467 break;
19468 default:
19469 llvm_unreachable("unexpected builtin ID");
19470 }
19471 Value *Value = EmitScalarExpr(E->getArg(0));
19472 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
19473 return Builder.CreateCall(Callee, Value);
19474 }
19475 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
19476 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
19477 return Builder.CreateCall(Callee);
19478 }
19479 case WebAssembly::BI__builtin_wasm_ref_null_func: {
19480 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
19481 return Builder.CreateCall(Callee);
19482 }
19483 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
19484 Value *Src = EmitScalarExpr(E->getArg(0));
19485 Value *Indices = EmitScalarExpr(E->getArg(1));
19486 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
19487 return Builder.CreateCall(Callee, {Src, Indices});
19488 }
19489 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
19490 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
19491 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
19492 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
19493 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
19494 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
19495 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
19496 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
19497 unsigned IntNo;
19498 switch (BuiltinID) {
19499 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
19500 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
19501 IntNo = Intrinsic::sadd_sat;
19502 break;
19503 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
19504 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
19505 IntNo = Intrinsic::uadd_sat;
19506 break;
19507 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
19508 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
19509 IntNo = Intrinsic::wasm_sub_sat_signed;
19510 break;
19511 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
19512 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
19513 IntNo = Intrinsic::wasm_sub_sat_unsigned;
19514 break;
19515 default:
19516 llvm_unreachable("unexpected builtin ID");
19517 }
19518 Value *LHS = EmitScalarExpr(E->getArg(0));
19519 Value *RHS = EmitScalarExpr(E->getArg(1));
19520 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
19521 return Builder.CreateCall(Callee, {LHS, RHS});
19522 }
19523 case WebAssembly::BI__builtin_wasm_abs_i8x16:
19524 case WebAssembly::BI__builtin_wasm_abs_i16x8:
19525 case WebAssembly::BI__builtin_wasm_abs_i32x4:
19526 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
19527 Value *Vec = EmitScalarExpr(E->getArg(0));
19528 Value *Neg = Builder.CreateNeg(Vec, "neg");
19529 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
19530 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
19531 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
19532 }
19533 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
19534 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
19535 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
19536 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
19537 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
19538 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
19539 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
19540 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
19541 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
19542 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
19543 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
19544 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
19545 Value *LHS = EmitScalarExpr(E->getArg(0));
19546 Value *RHS = EmitScalarExpr(E->getArg(1));
19547 Value *ICmp;
19548 switch (BuiltinID) {
19549 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
19550 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
19551 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
19552 ICmp = Builder.CreateICmpSLT(LHS, RHS);
19553 break;
19554 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
19555 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
19556 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
19557 ICmp = Builder.CreateICmpULT(LHS, RHS);
19558 break;
19559 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
19560 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
19561 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
19562 ICmp = Builder.CreateICmpSGT(LHS, RHS);
19563 break;
19564 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
19565 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
19566 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
19567 ICmp = Builder.CreateICmpUGT(LHS, RHS);
19568 break;
19569 default:
19570 llvm_unreachable("unexpected builtin ID");
19571 }
19572 return Builder.CreateSelect(ICmp, LHS, RHS);
19573 }
19574 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
19575 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
19576 Value *LHS = EmitScalarExpr(E->getArg(0));
19577 Value *RHS = EmitScalarExpr(E->getArg(1));
19578 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
19579 ConvertType(E->getType()));
19580 return Builder.CreateCall(Callee, {LHS, RHS});
19581 }
19582 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
19583 Value *LHS = EmitScalarExpr(E->getArg(0));
19584 Value *RHS = EmitScalarExpr(E->getArg(1));
19585 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
19586 return Builder.CreateCall(Callee, {LHS, RHS});
19587 }
19588 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
19589 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
19590 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
19591 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
19592 Value *Vec = EmitScalarExpr(E->getArg(0));
19593 unsigned IntNo;
19594 switch (BuiltinID) {
19595 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
19596 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
19597 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
19598 break;
19599 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
19600 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
19601 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
19602 break;
19603 default:
19604 llvm_unreachable("unexpected builtin ID");
19605 }
19606
19607 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
19608 return Builder.CreateCall(Callee, Vec);
19609 }
19610 case WebAssembly::BI__builtin_wasm_bitselect: {
19611 Value *V1 = EmitScalarExpr(E->getArg(0));
19612 Value *V2 = EmitScalarExpr(E->getArg(1));
19613 Value *C = EmitScalarExpr(E->getArg(2));
19614 Function *Callee =
19615 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
19616 return Builder.CreateCall(Callee, {V1, V2, C});
19617 }
19618 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
19619 Value *LHS = EmitScalarExpr(E->getArg(0));
19620 Value *RHS = EmitScalarExpr(E->getArg(1));
19621 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
19622 return Builder.CreateCall(Callee, {LHS, RHS});
19623 }
19624 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
19625 Value *Vec = EmitScalarExpr(E->getArg(0));
19626 Function *Callee =
19627 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
19628 return Builder.CreateCall(Callee, {Vec});
19629 }
19630 case WebAssembly::BI__builtin_wasm_any_true_v128:
19631 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
19632 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
19633 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
19634 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
19635 unsigned IntNo;
19636 switch (BuiltinID) {
19637 case WebAssembly::BI__builtin_wasm_any_true_v128:
19638 IntNo = Intrinsic::wasm_anytrue;
19639 break;
19640 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
19641 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
19642 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
19643 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
19644 IntNo = Intrinsic::wasm_alltrue;
19645 break;
19646 default:
19647 llvm_unreachable("unexpected builtin ID");
19648 }
19649 Value *Vec = EmitScalarExpr(E->getArg(0));
19650 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
19651 return Builder.CreateCall(Callee, {Vec});
19652 }
19653 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
19654 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
19655 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
19656 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
19657 Value *Vec = EmitScalarExpr(E->getArg(0));
19658 Function *Callee =
19659 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
19660 return Builder.CreateCall(Callee, {Vec});
19661 }
19662 case WebAssembly::BI__builtin_wasm_abs_f32x4:
19663 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
19664 Value *Vec = EmitScalarExpr(E->getArg(0));
19665 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
19666 return Builder.CreateCall(Callee, {Vec});
19667 }
19668 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
19669 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
19670 Value *Vec = EmitScalarExpr(E->getArg(0));
19671 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
19672 return Builder.CreateCall(Callee, {Vec});
19673 }
19674 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
19675 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
19676 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
19677 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
19678 Value *Low = EmitScalarExpr(E->getArg(0));
19679 Value *High = EmitScalarExpr(E->getArg(1));
19680 unsigned IntNo;
19681 switch (BuiltinID) {
19682 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
19683 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
19684 IntNo = Intrinsic::wasm_narrow_signed;
19685 break;
19686 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
19687 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
19688 IntNo = Intrinsic::wasm_narrow_unsigned;
19689 break;
19690 default:
19691 llvm_unreachable("unexpected builtin ID");
19692 }
19693 Function *Callee =
19694 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
19695 return Builder.CreateCall(Callee, {Low, High});
19696 }
19697 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
19698 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
19699 Value *Vec = EmitScalarExpr(E->getArg(0));
19700 unsigned IntNo;
19701 switch (BuiltinID) {
19702 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
19703 IntNo = Intrinsic::fptosi_sat;
19704 break;
19705 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
19706 IntNo = Intrinsic::fptoui_sat;
19707 break;
19708 default:
19709 llvm_unreachable("unexpected builtin ID");
19710 }
19711 llvm::Type *SrcT = Vec->getType();
19712 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
19713 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
19714 Value *Trunc = Builder.CreateCall(Callee, Vec);
19715 Value *Splat = Constant::getNullValue(TruncT);
19716 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
19717 }
19718 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
19719 Value *Ops[18];
19720 size_t OpIdx = 0;
19721 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
19722 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
19723 while (OpIdx < 18) {
19724 std::optional<llvm::APSInt> LaneConst =
19726 assert(LaneConst && "Constant arg isn't actually constant?");
19727 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
19728 }
19729 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
19730 return Builder.CreateCall(Callee, Ops);
19731 }
19732 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
19733 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
19734 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
19735 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
19736 Value *A = EmitScalarExpr(E->getArg(0));
19737 Value *B = EmitScalarExpr(E->getArg(1));
19738 Value *C = EmitScalarExpr(E->getArg(2));
19739 unsigned IntNo;
19740 switch (BuiltinID) {
19741 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
19742 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
19743 IntNo = Intrinsic::wasm_relaxed_madd;
19744 break;
19745 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
19746 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
19747 IntNo = Intrinsic::wasm_relaxed_nmadd;
19748 break;
19749 default:
19750 llvm_unreachable("unexpected builtin ID");
19751 }
19752 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
19753 return Builder.CreateCall(Callee, {A, B, C});
19754 }
19755 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
19756 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
19757 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
19758 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
19759 Value *A = EmitScalarExpr(E->getArg(0));
19760 Value *B = EmitScalarExpr(E->getArg(1));
19761 Value *C = EmitScalarExpr(E->getArg(2));
19762 Function *Callee =
19763 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
19764 return Builder.CreateCall(Callee, {A, B, C});
19765 }
19766 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
19767 Value *Src = EmitScalarExpr(E->getArg(0));
19768 Value *Indices = EmitScalarExpr(E->getArg(1));
19769 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
19770 return Builder.CreateCall(Callee, {Src, Indices});
19771 }
19772 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
19773 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
19774 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
19775 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
19776 Value *LHS = EmitScalarExpr(E->getArg(0));
19777 Value *RHS = EmitScalarExpr(E->getArg(1));
19778 unsigned IntNo;
19779 switch (BuiltinID) {
19780 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
19781 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
19782 IntNo = Intrinsic::wasm_relaxed_min;
19783 break;
19784 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
19785 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
19786 IntNo = Intrinsic::wasm_relaxed_max;
19787 break;
19788 default:
19789 llvm_unreachable("unexpected builtin ID");
19790 }
19791 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
19792 return Builder.CreateCall(Callee, {LHS, RHS});
19793 }
19794 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
19795 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
19796 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
19797 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
19798 Value *Vec = EmitScalarExpr(E->getArg(0));
19799 unsigned IntNo;
19800 switch (BuiltinID) {
19801 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
19802 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
19803 break;
19804 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
19805 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
19806 break;
19807 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
19808 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
19809 break;
19810 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
19811 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
19812 break;
19813 default:
19814 llvm_unreachable("unexpected builtin ID");
19815 }
19816 Function *Callee = CGM.getIntrinsic(IntNo);
19817 return Builder.CreateCall(Callee, {Vec});
19818 }
19819 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
19820 Value *LHS = EmitScalarExpr(E->getArg(0));
19821 Value *RHS = EmitScalarExpr(E->getArg(1));
19822 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
19823 return Builder.CreateCall(Callee, {LHS, RHS});
19824 }
19825 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
19826 Value *LHS = EmitScalarExpr(E->getArg(0));
19827 Value *RHS = EmitScalarExpr(E->getArg(1));
19828 Function *Callee =
19829 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
19830 return Builder.CreateCall(Callee, {LHS, RHS});
19831 }
19832 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
19833 Value *LHS = EmitScalarExpr(E->getArg(0));
19834 Value *RHS = EmitScalarExpr(E->getArg(1));
19835 Value *Acc = EmitScalarExpr(E->getArg(2));
19836 Function *Callee =
19837 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
19838 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
19839 }
19840 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
19841 Value *LHS = EmitScalarExpr(E->getArg(0));
19842 Value *RHS = EmitScalarExpr(E->getArg(1));
19843 Value *Acc = EmitScalarExpr(E->getArg(2));
19844 Function *Callee =
19845 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
19846 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
19847 }
19848 case WebAssembly::BI__builtin_wasm_table_get: {
19849 assert(E->getArg(0)->getType()->isArrayType());
19851 Value *Index = EmitScalarExpr(E->getArg(1));
19852 Function *Callee;
19854 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
19855 else if (E->getType().isWebAssemblyFuncrefType())
19856 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
19857 else
19858 llvm_unreachable(
19859 "Unexpected reference type for __builtin_wasm_table_get");
19860 return Builder.CreateCall(Callee, {Table, Index});
19861 }
19862 case WebAssembly::BI__builtin_wasm_table_set: {
19863 assert(E->getArg(0)->getType()->isArrayType());
19865 Value *Index = EmitScalarExpr(E->getArg(1));
19866 Value *Val = EmitScalarExpr(E->getArg(2));
19867 Function *Callee;
19869 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
19870 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
19871 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
19872 else
19873 llvm_unreachable(
19874 "Unexpected reference type for __builtin_wasm_table_set");
19875 return Builder.CreateCall(Callee, {Table, Index, Val});
19876 }
19877 case WebAssembly::BI__builtin_wasm_table_size: {
19878 assert(E->getArg(0)->getType()->isArrayType());
19880 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
19881 return Builder.CreateCall(Callee, Value);
19882 }
19883 case WebAssembly::BI__builtin_wasm_table_grow: {
19884 assert(E->getArg(0)->getType()->isArrayType());
19886 Value *Val = EmitScalarExpr(E->getArg(1));
19887 Value *NElems = EmitScalarExpr(E->getArg(2));
19888
19889 Function *Callee;
19891 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
19892 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
19893 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
19894 else
19895 llvm_unreachable(
19896 "Unexpected reference type for __builtin_wasm_table_grow");
19897
19898 return Builder.CreateCall(Callee, {Table, Val, NElems});
19899 }
19900 case WebAssembly::BI__builtin_wasm_table_fill: {
19901 assert(E->getArg(0)->getType()->isArrayType());
19903 Value *Index = EmitScalarExpr(E->getArg(1));
19904 Value *Val = EmitScalarExpr(E->getArg(2));
19905 Value *NElems = EmitScalarExpr(E->getArg(3));
19906
19907 Function *Callee;
19909 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
19910 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
19911 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
19912 else
19913 llvm_unreachable(
19914 "Unexpected reference type for __builtin_wasm_table_fill");
19915
19916 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
19917 }
19918 case WebAssembly::BI__builtin_wasm_table_copy: {
19919 assert(E->getArg(0)->getType()->isArrayType());
19922 Value *DstIdx = EmitScalarExpr(E->getArg(2));
19923 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
19924 Value *NElems = EmitScalarExpr(E->getArg(4));
19925
19926 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
19927
19928 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
19929 }
19930 default:
19931 return nullptr;
19932 }
19933}
19934
19935static std::pair<Intrinsic::ID, unsigned>
19937 struct Info {
19938 unsigned BuiltinID;
19939 Intrinsic::ID IntrinsicID;
19940 unsigned VecLen;
19941 };
19942 static Info Infos[] = {
19943#define CUSTOM_BUILTIN_MAPPING(x,s) \
19944 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
19945 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
19946 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
19947 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
19948 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
19949 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
19950 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
19951 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
19952 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
19953 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
19954 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
19955 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
19956 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
19957 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
19958 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
19959 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
19960 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
19961 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
19962 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
19963 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
19964 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
19965 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
19966 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
19967 // Legacy builtins that take a vector in place of a vector predicate.
19968 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
19969 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
19970 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
19971 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
19972 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
19973 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
19974 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
19975 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
19976#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
19977#undef CUSTOM_BUILTIN_MAPPING
19978 };
19979
19980 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
19981 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
19982 (void)SortOnce;
19983
19984 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
19985 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
19986 return {Intrinsic::not_intrinsic, 0};
19987
19988 return {F->IntrinsicID, F->VecLen};
19989}
19990
19992 const CallExpr *E) {
19993 Intrinsic::ID ID;
19994 unsigned VecLen;
19995 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
19996
19997 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
19998 // The base pointer is passed by address, so it needs to be loaded.
20000 Address BP = Address(Builder.CreateBitCast(
20002 llvm::Value *Base = Builder.CreateLoad(BP);
20003 // The treatment of both loads and stores is the same: the arguments for
20004 // the builtin are the same as the arguments for the intrinsic.
20005 // Load:
20006 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
20007 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
20008 // Store:
20009 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
20010 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
20012 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
20013 Ops.push_back(EmitScalarExpr(E->getArg(i)));
20014
20015 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
20016 // The load intrinsics generate two results (Value, NewBase), stores
20017 // generate one (NewBase). The new base address needs to be stored.
20018 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
20019 : Result;
20020 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
20022 llvm::Value *RetVal =
20023 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
20024 if (IsLoad)
20025 RetVal = Builder.CreateExtractValue(Result, 0);
20026 return RetVal;
20027 };
20028
20029 // Handle the conversion of bit-reverse load intrinsics to bit code.
20030 // The intrinsic call after this function only reads from memory and the
20031 // write to memory is dealt by the store instruction.
20032 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
20033 // The intrinsic generates one result, which is the new value for the base
20034 // pointer. It needs to be returned. The result of the load instruction is
20035 // passed to intrinsic by address, so the value needs to be stored.
20036 llvm::Value *BaseAddress =
20037 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
20038
20039 // Expressions like &(*pt++) will be incremented per evaluation.
20040 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
20041 // per call.
20042 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
20043 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
20044 Int8Ty, DestAddr.getAlignment());
20045 llvm::Value *DestAddress = DestAddr.getPointer();
20046
20047 // Operands are Base, Dest, Modifier.
20048 // The intrinsic format in LLVM IR is defined as
20049 // { ValueType, i8* } (i8*, i32).
20050 llvm::Value *Result = Builder.CreateCall(
20051 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
20052
20053 // The value needs to be stored as the variable is passed by reference.
20054 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
20055
20056 // The store needs to be truncated to fit the destination type.
20057 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
20058 // to be handled with stores of respective destination type.
20059 DestVal = Builder.CreateTrunc(DestVal, DestTy);
20060
20061 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
20062 // The updated value of the base pointer is returned.
20063 return Builder.CreateExtractValue(Result, 1);
20064 };
20065
20066 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
20067 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
20068 : Intrinsic::hexagon_V6_vandvrt;
20069 return Builder.CreateCall(CGM.getIntrinsic(ID),
20070 {Vec, Builder.getInt32(-1)});
20071 };
20072 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
20073 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
20074 : Intrinsic::hexagon_V6_vandqrt;
20075 return Builder.CreateCall(CGM.getIntrinsic(ID),
20076 {Pred, Builder.getInt32(-1)});
20077 };
20078
20079 switch (BuiltinID) {
20080 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
20081 // and the corresponding C/C++ builtins use loads/stores to update
20082 // the predicate.
20083 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
20084 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
20085 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
20086 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
20087 // Get the type from the 0-th argument.
20088 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
20089 Address PredAddr =
20091 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
20092 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
20093 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
20094
20095 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
20096 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
20097 PredAddr.getAlignment());
20098 return Builder.CreateExtractValue(Result, 0);
20099 }
20100 // These are identical to the builtins above, except they don't consume
20101 // input carry, only generate carry-out. Since they still produce two
20102 // outputs, generate the store of the predicate, but no load.
20103 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
20104 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
20105 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
20106 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
20107 // Get the type from the 0-th argument.
20108 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
20109 Address PredAddr =
20111 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
20112 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20113
20114 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
20115 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
20116 PredAddr.getAlignment());
20117 return Builder.CreateExtractValue(Result, 0);
20118 }
20119
20120 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
20121 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
20122 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
20123 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
20124 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
20125 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
20126 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
20127 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
20129 const Expr *PredOp = E->getArg(0);
20130 // There will be an implicit cast to a boolean vector. Strip it.
20131 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
20132 if (Cast->getCastKind() == CK_BitCast)
20133 PredOp = Cast->getSubExpr();
20134 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
20135 }
20136 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
20137 Ops.push_back(EmitScalarExpr(E->getArg(i)));
20138 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
20139 }
20140
20141 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
20142 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
20143 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
20144 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
20145 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
20146 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
20147 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
20148 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
20149 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
20150 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
20151 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
20152 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
20153 return MakeCircOp(ID, /*IsLoad=*/true);
20154 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
20155 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
20156 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
20157 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
20158 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
20159 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
20160 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
20161 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
20162 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
20163 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
20164 return MakeCircOp(ID, /*IsLoad=*/false);
20165 case Hexagon::BI__builtin_brev_ldub:
20166 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
20167 case Hexagon::BI__builtin_brev_ldb:
20168 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
20169 case Hexagon::BI__builtin_brev_lduh:
20170 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
20171 case Hexagon::BI__builtin_brev_ldh:
20172 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
20173 case Hexagon::BI__builtin_brev_ldw:
20174 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
20175 case Hexagon::BI__builtin_brev_ldd:
20176 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
20177 } // switch
20178
20179 return nullptr;
20180}
20181
20183 const CallExpr *E,
20184 ReturnValueSlot ReturnValue) {
20186 llvm::Type *ResultType = ConvertType(E->getType());
20187
20188 // Find out if any arguments are required to be integer constant expressions.
20189 unsigned ICEArguments = 0;
20191 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
20193 // Vector intrinsics don't have a type string.
20194 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
20195 BuiltinID <= clang::RISCV::LastRVVBuiltin);
20196 ICEArguments = 0;
20197 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
20198 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
20199 ICEArguments = 1 << 1;
20200 } else {
20201 assert(Error == ASTContext::GE_None && "Unexpected error");
20202 }
20203
20204 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
20205 ICEArguments |= (1 << 1);
20206 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
20207 ICEArguments |= (1 << 2);
20208
20209 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
20210 // Handle aggregate argument, namely RVV tuple types in segment load/store
20212 LValue L = EmitAggExprToLValue(E->getArg(i));
20213 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
20214 Ops.push_back(AggValue);
20215 continue;
20216 }
20217
20218 // If this is a normal argument, just emit it as a scalar.
20219 if ((ICEArguments & (1 << i)) == 0) {
20220 Ops.push_back(EmitScalarExpr(E->getArg(i)));
20221 continue;
20222 }
20223
20224 // If this is required to be a constant, constant fold it so that we know
20225 // that the generated intrinsic gets a ConstantInt.
20226 Ops.push_back(llvm::ConstantInt::get(
20228 }
20229
20230 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20231 unsigned NF = 1;
20232 // The 0th bit simulates the `vta` of RVV
20233 // The 1st bit simulates the `vma` of RVV
20234 constexpr unsigned RVV_VTA = 0x1;
20235 constexpr unsigned RVV_VMA = 0x2;
20236 int PolicyAttrs = 0;
20237 bool IsMasked = false;
20238
20239 // Required for overloaded intrinsics.
20241 switch (BuiltinID) {
20242 default: llvm_unreachable("unexpected builtin ID");
20243 case RISCV::BI__builtin_riscv_orc_b_32:
20244 case RISCV::BI__builtin_riscv_orc_b_64:
20245 case RISCV::BI__builtin_riscv_clz_32:
20246 case RISCV::BI__builtin_riscv_clz_64:
20247 case RISCV::BI__builtin_riscv_ctz_32:
20248 case RISCV::BI__builtin_riscv_ctz_64:
20249 case RISCV::BI__builtin_riscv_clmul_32:
20250 case RISCV::BI__builtin_riscv_clmul_64:
20251 case RISCV::BI__builtin_riscv_clmulh_32:
20252 case RISCV::BI__builtin_riscv_clmulh_64:
20253 case RISCV::BI__builtin_riscv_clmulr_32:
20254 case RISCV::BI__builtin_riscv_clmulr_64:
20255 case RISCV::BI__builtin_riscv_xperm4_32:
20256 case RISCV::BI__builtin_riscv_xperm4_64:
20257 case RISCV::BI__builtin_riscv_xperm8_32:
20258 case RISCV::BI__builtin_riscv_xperm8_64:
20259 case RISCV::BI__builtin_riscv_brev8_32:
20260 case RISCV::BI__builtin_riscv_brev8_64:
20261 case RISCV::BI__builtin_riscv_zip_32:
20262 case RISCV::BI__builtin_riscv_unzip_32: {
20263 switch (BuiltinID) {
20264 default: llvm_unreachable("unexpected builtin ID");
20265 // Zbb
20266 case RISCV::BI__builtin_riscv_orc_b_32:
20267 case RISCV::BI__builtin_riscv_orc_b_64:
20268 ID = Intrinsic::riscv_orc_b;
20269 break;
20270 case RISCV::BI__builtin_riscv_clz_32:
20271 case RISCV::BI__builtin_riscv_clz_64: {
20272 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
20273 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
20274 if (Result->getType() != ResultType)
20275 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
20276 "cast");
20277 return Result;
20278 }
20279 case RISCV::BI__builtin_riscv_ctz_32:
20280 case RISCV::BI__builtin_riscv_ctz_64: {
20281 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
20282 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
20283 if (Result->getType() != ResultType)
20284 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
20285 "cast");
20286 return Result;
20287 }
20288
20289 // Zbc
20290 case RISCV::BI__builtin_riscv_clmul_32:
20291 case RISCV::BI__builtin_riscv_clmul_64:
20292 ID = Intrinsic::riscv_clmul;
20293 break;
20294 case RISCV::BI__builtin_riscv_clmulh_32:
20295 case RISCV::BI__builtin_riscv_clmulh_64:
20296 ID = Intrinsic::riscv_clmulh;
20297 break;
20298 case RISCV::BI__builtin_riscv_clmulr_32:
20299 case RISCV::BI__builtin_riscv_clmulr_64:
20300 ID = Intrinsic::riscv_clmulr;
20301 break;
20302
20303 // Zbkx
20304 case RISCV::BI__builtin_riscv_xperm8_32:
20305 case RISCV::BI__builtin_riscv_xperm8_64:
20306 ID = Intrinsic::riscv_xperm8;
20307 break;
20308 case RISCV::BI__builtin_riscv_xperm4_32:
20309 case RISCV::BI__builtin_riscv_xperm4_64:
20310 ID = Intrinsic::riscv_xperm4;
20311 break;
20312
20313 // Zbkb
20314 case RISCV::BI__builtin_riscv_brev8_32:
20315 case RISCV::BI__builtin_riscv_brev8_64:
20316 ID = Intrinsic::riscv_brev8;
20317 break;
20318 case RISCV::BI__builtin_riscv_zip_32:
20319 ID = Intrinsic::riscv_zip;
20320 break;
20321 case RISCV::BI__builtin_riscv_unzip_32:
20322 ID = Intrinsic::riscv_unzip;
20323 break;
20324 }
20325
20326 IntrinsicTypes = {ResultType};
20327 break;
20328 }
20329
20330 // Zk builtins
20331
20332 // Zknh
20333 case RISCV::BI__builtin_riscv_sha256sig0:
20334 ID = Intrinsic::riscv_sha256sig0;
20335 break;
20336 case RISCV::BI__builtin_riscv_sha256sig1:
20337 ID = Intrinsic::riscv_sha256sig1;
20338 break;
20339 case RISCV::BI__builtin_riscv_sha256sum0:
20340 ID = Intrinsic::riscv_sha256sum0;
20341 break;
20342 case RISCV::BI__builtin_riscv_sha256sum1:
20343 ID = Intrinsic::riscv_sha256sum1;
20344 break;
20345
20346 // Zksed
20347 case RISCV::BI__builtin_riscv_sm4ks:
20348 ID = Intrinsic::riscv_sm4ks;
20349 break;
20350 case RISCV::BI__builtin_riscv_sm4ed:
20351 ID = Intrinsic::riscv_sm4ed;
20352 break;
20353
20354 // Zksh
20355 case RISCV::BI__builtin_riscv_sm3p0:
20356 ID = Intrinsic::riscv_sm3p0;
20357 break;
20358 case RISCV::BI__builtin_riscv_sm3p1:
20359 ID = Intrinsic::riscv_sm3p1;
20360 break;
20361
20362 // Zihintntl
20363 case RISCV::BI__builtin_riscv_ntl_load: {
20364 llvm::Type *ResTy = ConvertType(E->getType());
20365 ConstantInt *Mode = cast<ConstantInt>(Ops[1]);
20366
20367 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
20369 llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue())));
20370 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
20371 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
20372
20373 int Width;
20374 if(ResTy->isScalableTy()) {
20375 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
20376 llvm::Type *ScalarTy = ResTy->getScalarType();
20377 Width = ScalarTy->getPrimitiveSizeInBits() *
20378 SVTy->getElementCount().getKnownMinValue();
20379 } else
20380 Width = ResTy->getPrimitiveSizeInBits();
20381 LoadInst *Load = Builder.CreateLoad(
20382 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
20383
20384 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
20385 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
20386 RISCVDomainNode);
20387
20388 return Load;
20389 }
20390 case RISCV::BI__builtin_riscv_ntl_store: {
20391 ConstantInt *Mode = cast<ConstantInt>(Ops[2]);
20392
20393 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
20395 llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue())));
20396 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
20397 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
20398
20399 Value *BC = Builder.CreateBitCast(
20400 Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()), "cast");
20401
20402 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], BC);
20403 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
20404 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
20405 RISCVDomainNode);
20406
20407 return Store;
20408 }
20409
20410 // Vector builtins are handled from here.
20411#include "clang/Basic/riscv_vector_builtin_cg.inc"
20412 // SiFive Vector builtins are handled from here.
20413#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
20414 }
20415
20416 assert(ID != Intrinsic::not_intrinsic);
20417
20418 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
20419 return Builder.CreateCall(F, Ops, "");
20420}
20421
20423 const CallExpr *E) {
20425
20426 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
20427 Ops.push_back(EmitScalarExpr(E->getArg(i)));
20428
20429 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20430
20431 switch (BuiltinID) {
20432 default:
20433 llvm_unreachable("unexpected builtin ID.");
20434 case LoongArch::BI__builtin_loongarch_cacop_d:
20435 ID = Intrinsic::loongarch_cacop_d;
20436 break;
20437 case LoongArch::BI__builtin_loongarch_cacop_w:
20438 ID = Intrinsic::loongarch_cacop_w;
20439 break;
20440 case LoongArch::BI__builtin_loongarch_dbar:
20441 ID = Intrinsic::loongarch_dbar;
20442 break;
20443 case LoongArch::BI__builtin_loongarch_break:
20444 ID = Intrinsic::loongarch_break;
20445 break;
20446 case LoongArch::BI__builtin_loongarch_ibar:
20447 ID = Intrinsic::loongarch_ibar;
20448 break;
20449 case LoongArch::BI__builtin_loongarch_movfcsr2gr:
20450 ID = Intrinsic::loongarch_movfcsr2gr;
20451 break;
20452 case LoongArch::BI__builtin_loongarch_movgr2fcsr:
20453 ID = Intrinsic::loongarch_movgr2fcsr;
20454 break;
20455 case LoongArch::BI__builtin_loongarch_syscall:
20456 ID = Intrinsic::loongarch_syscall;
20457 break;
20458 case LoongArch::BI__builtin_loongarch_crc_w_b_w:
20459 ID = Intrinsic::loongarch_crc_w_b_w;
20460 break;
20461 case LoongArch::BI__builtin_loongarch_crc_w_h_w:
20462 ID = Intrinsic::loongarch_crc_w_h_w;
20463 break;
20464 case LoongArch::BI__builtin_loongarch_crc_w_w_w:
20465 ID = Intrinsic::loongarch_crc_w_w_w;
20466 break;
20467 case LoongArch::BI__builtin_loongarch_crc_w_d_w:
20468 ID = Intrinsic::loongarch_crc_w_d_w;
20469 break;
20470 case LoongArch::BI__builtin_loongarch_crcc_w_b_w:
20471 ID = Intrinsic::loongarch_crcc_w_b_w;
20472 break;
20473 case LoongArch::BI__builtin_loongarch_crcc_w_h_w:
20474 ID = Intrinsic::loongarch_crcc_w_h_w;
20475 break;
20476 case LoongArch::BI__builtin_loongarch_crcc_w_w_w:
20477 ID = Intrinsic::loongarch_crcc_w_w_w;
20478 break;
20479 case LoongArch::BI__builtin_loongarch_crcc_w_d_w:
20480 ID = Intrinsic::loongarch_crcc_w_d_w;
20481 break;
20482 case LoongArch::BI__builtin_loongarch_csrrd_w:
20483 ID = Intrinsic::loongarch_csrrd_w;
20484 break;
20485 case LoongArch::BI__builtin_loongarch_csrwr_w:
20486 ID = Intrinsic::loongarch_csrwr_w;
20487 break;
20488 case LoongArch::BI__builtin_loongarch_csrxchg_w:
20489 ID = Intrinsic::loongarch_csrxchg_w;
20490 break;
20491 case LoongArch::BI__builtin_loongarch_csrrd_d:
20492 ID = Intrinsic::loongarch_csrrd_d;
20493 break;
20494 case LoongArch::BI__builtin_loongarch_csrwr_d:
20495 ID = Intrinsic::loongarch_csrwr_d;
20496 break;
20497 case LoongArch::BI__builtin_loongarch_csrxchg_d:
20498 ID = Intrinsic::loongarch_csrxchg_d;
20499 break;
20500 case LoongArch::BI__builtin_loongarch_iocsrrd_b:
20501 ID = Intrinsic::loongarch_iocsrrd_b;
20502 break;
20503 case LoongArch::BI__builtin_loongarch_iocsrrd_h:
20504 ID = Intrinsic::loongarch_iocsrrd_h;
20505 break;
20506 case LoongArch::BI__builtin_loongarch_iocsrrd_w:
20507 ID = Intrinsic::loongarch_iocsrrd_w;
20508 break;
20509 case LoongArch::BI__builtin_loongarch_iocsrrd_d:
20510 ID = Intrinsic::loongarch_iocsrrd_d;
20511 break;
20512 case LoongArch::BI__builtin_loongarch_iocsrwr_b:
20513 ID = Intrinsic::loongarch_iocsrwr_b;
20514 break;
20515 case LoongArch::BI__builtin_loongarch_iocsrwr_h:
20516 ID = Intrinsic::loongarch_iocsrwr_h;
20517 break;
20518 case LoongArch::BI__builtin_loongarch_iocsrwr_w:
20519 ID = Intrinsic::loongarch_iocsrwr_w;
20520 break;
20521 case LoongArch::BI__builtin_loongarch_iocsrwr_d:
20522 ID = Intrinsic::loongarch_iocsrwr_d;
20523 break;
20524 case LoongArch::BI__builtin_loongarch_cpucfg:
20525 ID = Intrinsic::loongarch_cpucfg;
20526 break;
20527 case LoongArch::BI__builtin_loongarch_asrtle_d:
20528 ID = Intrinsic::loongarch_asrtle_d;
20529 break;
20530 case LoongArch::BI__builtin_loongarch_asrtgt_d:
20531 ID = Intrinsic::loongarch_asrtgt_d;
20532 break;
20533 case LoongArch::BI__builtin_loongarch_lddir_d:
20534 ID = Intrinsic::loongarch_lddir_d;
20535 break;
20536 case LoongArch::BI__builtin_loongarch_ldpte_d:
20537 ID = Intrinsic::loongarch_ldpte_d;
20538 break;
20539 // TODO: Support more Intrinsics.
20540 }
20541
20542 assert(ID != Intrinsic::not_intrinsic);
20543
20544 llvm::Function *F = CGM.getIntrinsic(ID);
20545 return Builder.CreateCall(F, Ops);
20546}
Defines the clang::ASTContext interface.
#define V(N, I)
DynTypedNode Node
StringRef P
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
auto * N
static constexpr Builtin::Info BuiltinInfo[]
Definition Builtins.cpp:32
llvm::Error Error
static void Accumulate(SMap &SM, CFGBlock *B)
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
static char bitActionToX86BTCode(BitTest::ActionKind A)
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
#define MMA_VARIANTS_B1_AND(geom, type)
static bool AArch64SISDIntrinsicsProvenSorted
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
@ UnsignedAlts
@ Vectorize1ArgType
@ FpCmpzModifiers
@ Use64BitVectors
@ VectorizeArgTypes
@ VectorRetGetArgs01
@ InventFloatType
@ AddRetType
@ Add2ArgTypes
@ VectorizeRetType
@ VectorRet
@ Add1ArgType
@ Use128BitVectors
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
static bool AArch64SVEIntrinsicsProvenSorted
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
#define NEONMAP0(NameBase)
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
static llvm::Value * CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition CGBuiltin.cpp:68
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
@ VolatileRead
@ NormalRead
@ Write
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
static bool AArch64SIMDIntrinsicsProvenSorted
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
CodeGenFunction::ComplexPairTy ComplexPairTy
llvm::Expected< T > Expected
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
unsigned Offset
Definition Format.cpp:2924
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition Value.h:94
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
const char * Data
Enumerates target-specific builtins in their own namespaces within namespace clang.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition APValue.h:415
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:182
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
CanQualType VoidPtrTy
IdentifierTable & Idents
Definition ASTContext.h:630
Builtin::Context & BuiltinInfo
Definition ASTContext.h:632
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getObjCIdType() const
Represents the Objective-CC id type.
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:743
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
@ GE_Missing_type
Missing a type.
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition Builtins.h:251
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition Expr.h:2831
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition Expr.h:3022
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Expr.cpp:1649
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition Expr.h:3001
Expr * getCallee()
Definition Expr.h:2981
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition Expr.h:3009
arg_range arguments()
Definition Expr.h:3070
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition Expr.cpp:1605
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
An aligned address.
Definition Address.h:29
static Address invalid()
Definition Address.h:46
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition Address.h:78
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:62
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:100
llvm::Value * getPointer() const
Definition Address.h:51
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:57
An aggregate value slot.
Definition CGValue.h:512
Address getAddress() const
Definition CGValue.h:650
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:97
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition CGBuilder.h:104
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, llvm::Value *Ptr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition CGBuilder.h:144
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:329
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:71
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition CGBuilder.h:89
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition CGBuilder.h:152
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(llvm::Value *Ptr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition CGBuilder.h:132
Address CreateGEP(Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:249
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition CGCXXABI.h:263
All available information about a concrete callee.
Definition CGCall.h:61
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition CGCall.h:128
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition CGCall.h:257
void add(RValue rvalue, QualType type)
Definition CGCall.h:281
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
llvm::Value * EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitTileslice(llvm::Value *Offset, llvm::Value *Base)
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
llvm::Value * EmitSMELdrStr(SVETypeFlags TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
void pushCleanupAfterFullExpr(CleanupKind Kind, As... A)
Queue a cleanup to be pushed after finishing the current full-expression, potentially with an active ...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSMELd1St1(SVETypeFlags TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * EmitSMEZero(SVETypeFlags TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitSMEReadWrite(SVETypeFlags TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition CGBuiltin.cpp:93
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1619
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:666
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition CGCall.cpp:654
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:351
llvm::Value * getBitFieldPointer() const
Definition CGValue.h:408
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition CGValue.h:346
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:39
static RValue getIgnored()
Definition CGValue.h:84
static RValue get(llvm::Value *V)
Definition CGValue.h:89
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition CGValue.h:110
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:96
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition CGCall.h:355
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition TargetInfo.h:142
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition TargetInfo.h:132
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition TargetInfo.h:114
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition TargetInfo.h:151
Complex values, per C99 6.2.5p11.
Definition Type.h:2767
Represents a concrete matrix type with constant number of rows and columns.
Definition Type.h:3638
T * getAttr() const
Definition DeclBase.h:556
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition DeclBase.cpp:228
bool hasAttr() const
Definition DeclBase.h:560
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3077
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3072
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3068
bool isPRValue() const
Definition Expr.h:272
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition Expr.h:805
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition Expr.h:438
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3545
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3052
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition Expr.cpp:3904
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:330
QualType getType() const
Definition Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition Expr.cpp:290
Represents a member of a struct/union/class.
Definition Decl.h:2960
Represents a function declaration or definition.
Definition Decl.h:1917
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2624
Represents a prototype with parameter type info, e.g.
Definition Type.h:4076
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:56
const Decl * getDecl() const
Definition GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
@ Other
Other implicit parameter.
Definition Decl.h:1682
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5194
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:274
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition Type.h:6532
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition Type.h:2820
QualType getPointeeType() const
Definition Type.h:2830
A (possibly-)qualified type.
Definition Type.h:736
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition Type.h:6767
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition Type.cpp:2728
LangAS getAddressSpace() const
Return the address space of this type.
Definition Type.h:6809
QualType getCanonicalType() const
Definition Type.h:6736
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition Type.cpp:2724
The collection of all-type qualifiers we support.
Definition Type.h:146
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
MemEltType getMemEltType() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isOverloadWhile() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
unsigned getSplatOperand() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:337
Exposes information about the current target.
Definition TargetInfo.h:207
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:290
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition TargetInfo.h:649
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
bool isLittleEndian() const
unsigned getMaxOpenCLWorkGroupSize() const
Definition TargetInfo.h:811
bool isBigEndian() const
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition TargetInfo.h:685
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
The base class of the type hierarchy.
Definition Type.h:1577
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.cpp:1799
bool isBlockPointerType() const
Definition Type.h:6944
bool isVoidType() const
Definition Type.h:7254
bool isBooleanType() const
Definition Type.h:7370
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2063
bool isArrayType() const
Definition Type.h:7002
bool isPointerType() const
Definition Type.h:6936
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition Type.h:7286
const T * castAs() const
Member-template castAs<specific type>.
Definition Type.h:7527
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:631
bool isBitIntType() const
Definition Type.h:7160
bool isFloatingType() const
Definition Type.cpp:2166
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2113
const T * getAs() const
Member-template getAs<specific type>'.
Definition Type.h:7460
QualType getType() const
Definition Decl.h:712
QualType getType() const
Definition Value.cpp:233
Represents a GCC generic vector type.
Definition Type.h:3409
SmallVector< OSLogBufferItem, 4 > Items
Definition OSLog.h:113
unsigned char getNumArgsByte() const
Definition OSLog.h:148
unsigned char getSummaryByte() const
Definition OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
constexpr XRayInstrMask Typed
Definition XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition StoreRef.h:27
bool Dup(InterpState &S, CodePtr OpPC)
Definition Interp.h:796
bool Call(InterpState &S, CodePtr OpPC, const Function *Func)
Definition Interp.h:1628
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition Interp.h:182
bool Zero(InterpState &S, CodePtr OpPC)
Definition Interp.h:1492
bool Neg(InterpState &S, CodePtr OpPC)
Definition Interp.h:434
bool Load(InterpState &S, CodePtr OpPC)
Definition Interp.h:1174
bool Cast(InterpState &S, CodePtr OpPC)
Definition Interp.h:1430
bool isa(CodeGen::Address addr)
Definition Address.h:155
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition Specifiers.h:145
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
Expr * Cond
};
@ C
Languages that the frontend can parse and compile.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
U cast(CodeGen::Address addr)
Definition Address.h:152
YAML serialization mapping.
Definition Dominators.h:30
#define true
Definition stdbool.h:21
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:622
APValue Val
Val - This is the value the expression can be folded to.
Definition Expr.h:624
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition Sanitizers.h:159
bool has(XRayInstrMask K) const
Definition XRayInstr.h:48
#define sinh(__x)
Definition tgmath.h:373
#define asin(__x)
Definition tgmath.h:112
#define scalbln(__x, __y)
Definition tgmath.h:1182
#define sqrt(__x)
Definition tgmath.h:520
#define acos(__x)
Definition tgmath.h:83
#define fmin(__x, __y)
Definition tgmath.h:780
#define exp(__x)
Definition tgmath.h:431
#define ilogb(__x)
Definition tgmath.h:851
#define copysign(__x, __y)
Definition tgmath.h:618
#define erf(__x)
Definition tgmath.h:636
#define atanh(__x)
Definition tgmath.h:228
#define remquo(__x, __y, __z)
Definition tgmath.h:1111
#define nextafter(__x, __y)
Definition tgmath.h:1055
#define frexp(__x, __y)
Definition tgmath.h:816
#define asinh(__x)
Definition tgmath.h:199
#define erfc(__x)
Definition tgmath.h:653
#define atan2(__x, __y)
Definition tgmath.h:566
#define nexttoward(__x, __y)
Definition tgmath.h:1073
#define hypot(__x, __y)
Definition tgmath.h:833
#define exp2(__x)
Definition tgmath.h:670
#define sin(__x)
Definition tgmath.h:286
#define cbrt(__x)
Definition tgmath.h:584
#define log2(__x)
Definition tgmath.h:970
#define llround(__x)
Definition tgmath.h:919
#define cosh(__x)
Definition tgmath.h:344
#define trunc(__x)
Definition tgmath.h:1216
#define fmax(__x, __y)
Definition tgmath.h:762
#define ldexp(__x, __y)
Definition tgmath.h:868
#define acosh(__x)
Definition tgmath.h:170
#define tgamma(__x)
Definition tgmath.h:1199
#define scalbn(__x, __y)
Definition tgmath.h:1165
#define round(__x)
Definition tgmath.h:1148
#define fmod(__x, __y)
Definition tgmath.h:798
#define llrint(__x)
Definition tgmath.h:902
#define tan(__x)
Definition tgmath.h:315
#define cos(__x)
Definition tgmath.h:257
#define log10(__x)
Definition tgmath.h:936
#define fabs(__x)
Definition tgmath.h:549
#define pow(__x, __y)
Definition tgmath.h:490
#define log1p(__x)
Definition tgmath.h:953
#define rint(__x)
Definition tgmath.h:1131
#define expm1(__x)
Definition tgmath.h:687
#define remainder(__x, __y)
Definition tgmath.h:1090
#define fdim(__x, __y)
Definition tgmath.h:704
#define lgamma(__x)
Definition tgmath.h:885
#define tanh(__x)
Definition tgmath.h:402
#define lrint(__x)
Definition tgmath.h:1004
#define atan(__x)
Definition tgmath.h:141
#define floor(__x)
Definition tgmath.h:722
#define ceil(__x)
Definition tgmath.h:601
#define log(__x)
Definition tgmath.h:460
#define logb(__x)
Definition tgmath.h:987
#define nearbyint(__x)
Definition tgmath.h:1038
#define lround(__x)
Definition tgmath.h:1021
#define fma(__x, __y, __z)
Definition tgmath.h:742